Repository 'bakta'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/bakta

Changeset 0:1a27ad3d0cdf (2022-09-01)
Next changeset 1:da5f1924bb2e (2022-09-16)
Commit message:
planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit 7d595b05b8d595f554b57dadbf1beb0b39733af3
added:
bakta.xml
macro.xml
test-data/NC_002127.1.fna
test-data/TEST_1/TEST_1.embl
test-data/TEST_1/TEST_1.faa
test-data/TEST_1/TEST_1.ffn
test-data/TEST_1/TEST_1.fna
test-data/TEST_1/TEST_1.gbff
test-data/TEST_1/TEST_1.gff3
test-data/TEST_1/TEST_1.hypotheticals.faa
test-data/TEST_1/TEST_1.hypotheticals.tsv
test-data/TEST_1/TEST_1.json
test-data/TEST_1/TEST_1.log
test-data/TEST_1/TEST_1.tsv
test-data/TEST_1/TEST_1.txt
test-data/TEST_2/TEST_2.embl
test-data/TEST_2/TEST_2.faa
test-data/TEST_2/TEST_2.ffn
test-data/TEST_2/TEST_2.fna
test-data/TEST_2/TEST_2.gbff
test-data/TEST_2/TEST_2.gff3
test-data/TEST_2/TEST_2.hypotheticals.faa
test-data/TEST_2/TEST_2.hypotheticals.tsv
test-data/TEST_2/TEST_2.json
test-data/TEST_2/TEST_2.log
test-data/TEST_2/TEST_2.tsv
test-data/TEST_2/TEST_2.txt
test-data/TEST_3/TEST_3.embl
test-data/TEST_3/TEST_3.faa
test-data/TEST_3/TEST_3.ffn
test-data/TEST_3/TEST_3.fna
test-data/TEST_3/TEST_3.gbff
test-data/TEST_3/TEST_3.gff3
test-data/TEST_3/TEST_3.json
test-data/TEST_3/TEST_3.log
test-data/TEST_3/TEST_3.tsv
test-data/TEST_4/TEST_4.embl
test-data/TEST_4/TEST_4.faa
test-data/TEST_4/TEST_4.ffn
test-data/TEST_4/TEST_4.fna
test-data/TEST_4/TEST_4.gbff
test-data/TEST_4/TEST_4.gff3
test-data/TEST_4/TEST_4.hypotheticals.faa
test-data/TEST_4/TEST_4.hypotheticals.tsv
test-data/TEST_4/TEST_4.json
test-data/TEST_4/TEST_4.log
test-data/TEST_4/TEST_4.tsv
test-data/TEST_4/TEST_4.txt
test-data/TEST_5/TEST_5.log
test-data/TEST_5/TEST_5.txt
test-data/prodigal.tf
test-data/replicons.tsv
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3f
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3i
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3m
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3p
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-mutation.tab
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-suppress
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-susceptible.tab
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pdb
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.phr
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pin
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.psq
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.ptf
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pto
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ndb
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nhr
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nin
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.not
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nsq
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ntf
test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nto
test-data/test-db/amrfinderplus-db/2021-09-30.1/database_format_version.txt
test-data/test-db/amrfinderplus-db/2021-09-30.1/fam.tab
test-data/test-db/amrfinderplus-db/2021-09-30.1/taxgroup.tab
test-data/test-db/amrfinderplus-db/2021-09-30.1/version.txt
test-data/test-db/antifam.h3f
test-data/test-db/antifam.h3i
test-data/test-db/antifam.h3m
test-data/test-db/antifam.h3p
test-data/test-db/bakta.db
test-data/test-db/expert-protein-sequences.dmnd
test-data/test-db/ncRNA-genes.i1f
test-data/test-db/ncRNA-genes.i1i
test-data/test-db/ncRNA-genes.i1m
test-data/test-db/ncRNA-genes.i1p
test-data/test-db/ncRNA-regions.i1f
test-data/test-db/ncRNA-regions.i1i
test-data/test-db/ncRNA-regions.i1m
test-data/test-db/ncRNA-regions.i1p
test-data/test-db/oric.fna
test-data/test-db/orit.fna
test-data/test-db/pfam.h3f
test-data/test-db/pfam.h3i
test-data/test-db/pfam.h3m
test-data/test-db/pfam.h3p
test-data/test-db/psc.dmnd
test-data/test-db/rRNA.i1f
test-data/test-db/rRNA.i1i
test-data/test-db/rRNA.i1m
test-data/test-db/rRNA.i1p
test-data/test-db/rfam-go.tsv
test-data/test-db/sorf.dmnd
test-data/test-db/version.json
test-data/test_database.loc
test-data/tmp/NC_002127.1.fna
test-data/tmp/TEST_1/TEST_1.embl
test-data/tmp/TEST_1/TEST_1.faa
test-data/tmp/TEST_1/TEST_1.ffn
test-data/tmp/TEST_1/TEST_1.fna
test-data/tmp/TEST_1/TEST_1.gbff
test-data/tmp/TEST_1/TEST_1.gff3
test-data/tmp/TEST_1/TEST_1.hypotheticals.faa
test-data/tmp/TEST_1/TEST_1.hypotheticals.tsv
test-data/tmp/TEST_1/TEST_1.json
test-data/tmp/TEST_1/TEST_1.log
test-data/tmp/TEST_1/TEST_1.tsv
test-data/tmp/TEST_1/TEST_1.txt
test-data/tmp/TEST_2/TEST_2.embl
test-data/tmp/TEST_2/TEST_2.faa
test-data/tmp/TEST_2/TEST_2.ffn
test-data/tmp/TEST_2/TEST_2.fna
test-data/tmp/TEST_2/TEST_2.gbff
test-data/tmp/TEST_2/TEST_2.gff3
test-data/tmp/TEST_2/TEST_2.hypotheticals.faa
test-data/tmp/TEST_2/TEST_2.hypotheticals.tsv
test-data/tmp/TEST_2/TEST_2.json
test-data/tmp/TEST_2/TEST_2.log
test-data/tmp/TEST_2/TEST_2.tsv
test-data/tmp/TEST_2/TEST_2.txt
test-data/tmp/TEST_3/TEST_3.embl
test-data/tmp/TEST_3/TEST_3.faa
test-data/tmp/TEST_3/TEST_3.ffn
test-data/tmp/TEST_3/TEST_3.fna
test-data/tmp/TEST_3/TEST_3.gbff
test-data/tmp/TEST_3/TEST_3.gff3
test-data/tmp/TEST_3/TEST_3.json
test-data/tmp/TEST_3/TEST_3.log
test-data/tmp/TEST_3/TEST_3.tsv
test-data/tmp/TEST_3/TEST_3.txt
test-data/tmp/TEST_4/TEST_4.embl
test-data/tmp/TEST_4/TEST_4.faa
test-data/tmp/TEST_4/TEST_4.ffn
test-data/tmp/TEST_4/TEST_4.fna
test-data/tmp/TEST_4/TEST_4.gbff
test-data/tmp/TEST_4/TEST_4.gff3
test-data/tmp/TEST_4/TEST_4.hypotheticals.faa
test-data/tmp/TEST_4/TEST_4.hypotheticals.tsv
test-data/tmp/TEST_4/TEST_4.json
test-data/tmp/TEST_4/TEST_4.log
test-data/tmp/TEST_4/TEST_4.tsv
test-data/tmp/TEST_4/TEST_4.txt
test-data/tmp/TEST_5/TEST_5.log
test-data/tmp/TEST_5/TEST_5.txt
test-data/tmp/prodigal.tf
test-data/tmp/replicons.tsv
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3f
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3i
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3m
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3p
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-mutation.tab
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-suppress
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-susceptible.tab
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pdb
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.phr
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pin
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.psq
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.ptf
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pto
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ndb
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nhr
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nin
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.not
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nsq
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ntf
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nto
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/database_format_version.txt
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/fam.tab
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/taxgroup.tab
test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/version.txt
test-data/tmp/test-db/antifam.h3f
test-data/tmp/test-db/antifam.h3i
test-data/tmp/test-db/antifam.h3m
test-data/tmp/test-db/antifam.h3p
test-data/tmp/test-db/bakta.db
test-data/tmp/test-db/expert-protein-sequences.dmnd
test-data/tmp/test-db/ncRNA-genes.i1f
test-data/tmp/test-db/ncRNA-genes.i1i
test-data/tmp/test-db/ncRNA-genes.i1m
test-data/tmp/test-db/ncRNA-genes.i1p
test-data/tmp/test-db/ncRNA-regions.i1f
test-data/tmp/test-db/ncRNA-regions.i1i
test-data/tmp/test-db/ncRNA-regions.i1m
test-data/tmp/test-db/ncRNA-regions.i1p
test-data/tmp/test-db/oric.fna
test-data/tmp/test-db/orit.fna
test-data/tmp/test-db/pfam.h3f
test-data/tmp/test-db/pfam.h3i
test-data/tmp/test-db/pfam.h3m
test-data/tmp/test-db/pfam.h3p
test-data/tmp/test-db/psc.dmnd
test-data/tmp/test-db/rRNA.i1f
test-data/tmp/test-db/rRNA.i1i
test-data/tmp/test-db/rRNA.i1m
test-data/tmp/test-db/rRNA.i1p
test-data/tmp/test-db/rfam-go.tsv
test-data/tmp/test-db/sorf.dmnd
test-data/tmp/test-db/version.json
test-data/tmp/test_database.loc
test-data/tmp/user-proteins.faa
test-data/user-proteins.faa
tool-data/bakta_database.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 1a27ad3d0cdf bakta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bakta.xml Thu Sep 01 17:28:43 2022 +0000
[
b'@@ -0,0 +1,507 @@\n+<tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>\n+        genome annotation via alignment-free sequence identification\n+    </description>\n+    <macros>\n+        <import>macro.xml</import>\n+    </macros>\n+    <expand macro=\'edam\'/>\n+    <expand macro=\'xrefs\'/>\n+    <expand macro="requirements"/>\n+    <expand macro="version_command"/>\n+\n+    <command detect_errors="aggressive"><![CDATA[\n+        bakta\n+        #*======================================\n+                    CPU option\n+        ======================================*#\n+        --threads \\${GALAXY_SLOTS:-1}\n+        #*======================================\n+                    Bakta database\n+        ======================================*#\n+        --db $input_option.db_select.fields.path\n+        #if $input_option.min_contig_length\n+            --min-contig-length $input_option.min_contig_length\n+        #else if $annotation.compliant\n+            --min-contig-length 200\n+        #else\n+            --min-contig-length 1\n+        #end if\n+        --prefix bakta_output\n+        #*======================================\n+                  Organism options\n+              genus/species/strain/plasmid\n+        ======================================*#\n+        #if $organism.genus\n+            --genus \'$organism.genus\'\n+        #end if\n+        #if $organism.species\n+            --species \'$organism.species\'\n+        #end if\n+        #if $organism.strain\n+            --strain \'$organism.strain\'\n+        #end if\n+        #if $organism.plasmid\n+            --plasmid \'$organism.plasmid\'\n+        #end if\n+        #*======================================\n+                    Annotation options\n+            gram type, prodigal/protein file\n+        ======================================*#\n+        $annotation.complete\n+        #if $annotation.prodigal\n+            --prodigal-tf \'$annotation.prodigal\'\n+        #end if\n+        #if $annotation.translation_table\n+            --translation-table \'$annotation.translation_table\'\n+        #end if\n+        #if $annotation.gram\n+            --gram \'$annotation.gram\'\n+        #end if\n+        $annotation.keep_contig_headers\n+        #if $annotation.replicons\n+            --replicons \'$annotation.replicons\'\n+        #end if\n+        $annotation.compliant\n+        #if $annotation.proteins\n+            --proteins \'$annotation.proteins\'\n+        #end if\n+        #*======================================\n+                    Workflow OPTIONS\n+         skip some step of the bakta analysis\n+        ======================================*#\n+\n+        #if "skip_trna" in $workflow.skip_analysis\n+            --skip-trna\n+        #end if\n+        #if "skip_tmrna" in $workflow.skip_analysis\n+            --skip-tmrna\n+        #end if\n+        #if "skip_rrna" in $workflow.skip_analysis\n+            --skip-rrna\n+        #end if\n+        #if "skip_ncrna" in $workflow.skip_analysis\n+            --skip-ncrna\n+        #end if\n+        #if "skip_ncrna_region" in $workflow.skip_analysis\n+            --skip-ncrna-region\n+        #end if\n+        #if "skip_crispr" in $workflow.skip_analysis\n+            --skip-crispr\n+        #end if\n+        #if "skip_cds" in $workflow.skip_analysis\n+            --skip-cds\n+        #end if\n+        #if "skip_sorf" in $workflow.skip_analysis\n+            --skip-sorf\n+        #end if\n+        #if "skip_gap" in $workflow.skip_analysis\n+            --skip-gap\n+        #end if\n+        #if "skip_ori" in $workflow.skip_analysis\n+            --skip-ori\n+        #end if\n+\n+        #*======================================\n+                    Genome file\n+        ======================================*#\n+        \'$input_option.input_file\'\n+        #*======================================\n+                    LOG file\n+        ======================================*#\n+        | tee \'$logfile\'\n+        ]]></command>\n+    <inputs>\n+      <!-- DB and fil'..b'losely related but distinct protein families.\n+          This is achieved via an alignment-free sequence identification (AFSI) approach\n+          using full-length MD5 protein sequence hash digests.\n+        *Small proteins/short open reading frames*\n+          Bakta detects and annotates small proteins/short open reading frames (sORF).\n+\n+        *Expert annotation systems*\n+          To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes,\n+          Bakta includes & merges different expert annotation systems.\n+          Currently, Bakta uses NCBI\'s AMRFinderPlus for AMR gene annotations\n+          as well as an generalized protein sequence expert system with distinct\n+          coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI\'s BlastRules.\n+\n+        *Comprehensive workflow*\n+          Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT\n+          and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS.\n+\n+        *GFF3 & INSDC conform annotations*\n+          Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission\n+          (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats,\n+          respectively for representative genomes of all ESKAPE species).\n+\n+        *Bacteria & plasmids*\n+          Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only.\n+\n+        **Input options**\n+          1. Choose a genome or assembly in fasta format to use bakta annotations\n+          2. Choose A version of the Bakta database\n+\n+        **Organism options**\n+        You can specify informations about analysed fasta as text input for:\n+        - genus\n+        - species\n+        - strain\n+        - plasmid\n+\n+        **Annotation options**\n+        1. You can specify if all sequences (chromosome or plasmids) are complete or not\n+        2. You can add your own prodigal traingin file for CDS prediction\xc5\x93\n+        3. The translation table could be modified, default is the 11th for bacteria\n+        4. You can specify if bacteria is gram -/+ or unknonw (default value unknow)\n+        5. You can keep the name of contig present in the input file\n+        6. You can specify your own replicon table as a TSV/CSV file\n+        7. The compliance option is for ready to submit annotation file to Public database\n+        as ENA, Genbank EMBL\n+        8. You can specify a protein sequence file for annotation in GenBank or fasta formats\n+        Using the Fasta format, each reference sequence can be provided in a short or long format:\n+\n+        # short:\n+        >id gene~~~product~~~dbxrefs\n+        MAQ...\n+\n+        # long:\n+        >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs\n+        MAQ...\n+\n+        **Skip steps**\n+        Some steps could be skiped:\n+        - skip-trna           Skip tRNA detection & annotation\n+        - skip-tmrna          Skip tmRNA detection & annotation\n+        - skip-rrna           Skip rRNA detection & annotation\n+        - skip-ncrna          Skip ncRNA detection & annotation\n+        - skip-ncrna-region   Skip ncRNA region detection & annotation\n+        - skip-crispr         Skip CRISPR array detection & annotation\n+        - skip-cds            Skip CDS detection & annotation\n+        - skip-pseudo         Skip pseudogene detection & annotation\n+        - skip-sorf           Skip sORF detection & annotation\n+        - skip-gap            Skip gap detection & annotation\n+        - skip-ori            Skip oriC/oriT detection & annotation\n+\n+        **Output options**\n+        Bakta produce numbers of output files, you can select what type of file you want:\n+        - Summary of the annotation\n+        - Annotated files\n+        - Sequence files for nucleotide and/or amino acid\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 1a27ad3d0cdf macro.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,28 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.4.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="version_command">
+        <version_command><![CDATA[bakta --version]]></version_command>
+    </xml>
+    <xml name="edam">
+      <edam_topics>
+          <edam_topic>topic_3174</edam_topic>
+      </edam_topics>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type='bio.tools'>Bakta</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">bakta</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1099/mgen.0.000685</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/NC_002127.1.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NC_002127.1.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,20 @@
+>NC_002127.1 Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTT
+TCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCA
+TCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGT
+CTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGC
+TTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTA
+TTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAG
+GAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTA
+AGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAA
+ACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTAT
+CACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTA
+TGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGT
+TCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGA
+ATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTT
+TAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTA
+ACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATT
+AAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,87 @@
+ID   contig_1; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   contig_1;
+XX
+DE   plasmid unnamed1, complete sequence
+XX
+OS   .
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 13:06:54
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     2
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /plasmid="unnamed1"
+FT   gene            413..736
+FT                   /locus_tag="IHHALP_00005"
+FT   CDS             413..736
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00005"
+FT                   /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT                   AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT                   MAD"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00005"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+FT   gene            complement(join(971..1330,1..141))
+FT                   /locus_tag="IHHALP_00010"
+FT   CDS             complement(join(971..1330,1..141))
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00010"
+FT                   /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT                   EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT                   YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT                   IPI"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00010"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.ffn Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.fna Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,24 @@
+>contig_1 [completeness=complete] [topology=circular] [gcode=11]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,83 @@
+LOCUS       contig_1                1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  plasmid unnamed1, complete sequence.
+ACCESSION   contig_1
+VERSION     contig_1
+KEYWORDS    .
+SOURCE      None
+  ORGANISM  .
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 13:06:54
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     2
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /plasmid="unnamed1"
+     gene            413..736
+                     /locus_tag="IHHALP_00005"
+     CDS             413..736
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00005"
+                     /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+                     AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+                     MAD"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00005"
+                     /inference="ab initio prediction:Prodigal:2.6"
+     gene            complement(join(971..1330,1..141))
+                     /locus_tag="IHHALP_00010"
+     CDS             complement(join(971..1330,1..141))
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00010"
+                     /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+                     EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+                     YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+                     IPI"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00010"
+                     /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,36 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region contig_1 1 1330
+contig_1 Bakta region 1 1330 . + . ID=contig_1;Name=contig_1;Is_circular=true
+contig_1 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein
+contig_1 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein
+##FASTA
+>contig_1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.hypotheticals.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.hypotheticals.tsv Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.4.2, https://github.com/oschwengers/bakta
+#Database v3.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+contig_1 413 736 + IHHALP_00005 12.1 10.4
+contig_1 971 141 - IHHALP_00010 18.9 7.7
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,90 @@
+{
+    "genome": {
+        "genus": null,
+        "species": null,
+        "strain": null,
+        "complete": true,
+        "gram": "?",
+        "translation_table": 11
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.6203007518796992
+    },
+    "features": [
+        {
+            "type": "cds",
+            "contig": "contig_1",
+            "start": 413,
+            "stop": 736,
+            "strand": "+",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 2,
+            "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+            "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+            "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 12072.90819999999,
+                "isoelectric_point": 10.367886161804197
+            },
+            "id": "IHHALPPJCH_1",
+            "locus": "IHHALP_00005"
+        },
+        {
+            "type": "cds",
+            "contig": "contig_1",
+            "start": 971,
+            "stop": 141,
+            "strand": "-",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "AGGA/GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 1,
+            "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+            "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+            "edge": true,
+            "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 18866.325799999995,
+                "isoelectric_point": 7.696590614318848
+            },
+            "id": "IHHALPPJCH_2",
+            "locus": "IHHALP_00010"
+        }
+    ],
+    "sequences": [
+        {
+            "id": "contig_1",
+            "description": "[completeness=complete] [topology=circular] [gcode=11]",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "simple_id": "contig_1",
+            "orig_id": "NC_002127.1",
+            "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "name": "unnamed1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 13:06:53",
+        "end": "2022-08-22 13:06:54"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,90 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+predict tRNAs...
+ found: 0
+predict tmRNAs...
+ found: 0
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2 
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ combine annotations and mark hypotheticals...
+ analyze hypothetical proteins: 2
+ detected Pfam hits: 0 
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 22
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+   hypotheticals: 2
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpb092rhfs/job_working_directory/000/2/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:01 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+contig_1 cds 413 736 + IHHALP_00005 hypothetical protein
+contig_1 cds 971 141 - IHHALP_00010 hypothetical protein
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_1/TEST_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,89 @@
+ID   NC_002127; SV 1; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   NC_002127;
+XX
+DE   Escherichia coli o157:h7 Sakai plasmid pOSAK1, complete sequence
+XX
+OS   Escherichia coli o157:h7 Sakai
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 13:07:08
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     2
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /organism="Escherichia coli o157:h7 Sakai"
+FT                   /strain="Sakai"
+FT                   /plasmid="pOSAK1"
+FT   gene            413..736
+FT                   /locus_tag="IHHALP_00005"
+FT   CDS             413..736
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00005"
+FT                   /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT                   AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT                   MAD"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00005"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+FT   gene            complement(join(971..1330,1..141))
+FT                   /locus_tag="IHHALP_00010"
+FT   CDS             complement(join(971..1330,1..141))
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00010"
+FT                   /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT                   EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT                   YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT                   IPI"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00010"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.ffn Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,24 @@
+>NC_002127.1 Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,85 @@
+LOCUS       NC_002127.1             1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  Escherichia coli o157:h7 Sakai plasmid pOSAK1, complete sequence.
+ACCESSION   NC_002127
+VERSION     NC_002127.1
+KEYWORDS    .
+SOURCE      Escherichia coli o157:h7 Sakai
+  ORGANISM  Escherichia coli o157:h7 Sakai
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 13:07:08
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     2
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /organism="Escherichia coli o157:h7 Sakai"
+                     /strain="Sakai"
+                     /plasmid="pOSAK1"
+     gene            413..736
+                     /locus_tag="IHHALP_00005"
+     CDS             413..736
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00005"
+                     /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+                     AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+                     MAD"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00005"
+                     /inference="ab initio prediction:Prodigal:2.6"
+     gene            complement(join(971..1330,1..141))
+                     /locus_tag="IHHALP_00010"
+     CDS             complement(join(971..1330,1..141))
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00010"
+                     /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+                     EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+                     YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+                     IPI"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00010"
+                     /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,37 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# organism Escherichia coli o157:h7 Sakai
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region NC_002127.1 1 1330
+NC_002127.1 Bakta region 1 1330 . + . ID=NC_002127.1;Name=NC_002127.1;Is_circular=true
+NC_002127.1 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein
+NC_002127.1 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein
+##FASTA
+>NC_002127.1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.hypotheticals.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.hypotheticals.tsv Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.4.2, https://github.com/oschwengers/bakta
+#Database v3.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+NC_002127.1 413 736 + IHHALP_00005 12.1 10.4
+NC_002127.1 971 141 - IHHALP_00010 18.9 7.7
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,89 @@
+{
+    "genome": {
+        "genus": "Escherichia",
+        "species": "coli o157:h7",
+        "strain": "Sakai",
+        "plasmid": "pOSAK1",
+        "complete": true,
+        "gram": "?",
+        "translation_table": 11
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.6203007518796992
+    },
+    "features": [
+        {
+            "type": "cds",
+            "contig": "NC_002127.1",
+            "start": 413,
+            "stop": 736,
+            "strand": "+",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 2,
+            "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+            "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+            "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 12072.90819999999,
+                "isoelectric_point": 10.367886161804197
+            },
+            "id": "IHHALPPJCH_1",
+            "locus": "IHHALP_00005"
+        },
+        {
+            "type": "cds",
+            "contig": "NC_002127.1",
+            "start": 971,
+            "stop": 141,
+            "strand": "-",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "AGGA/GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 1,
+            "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+            "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+            "edge": true,
+            "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 18866.325799999995,
+                "isoelectric_point": 7.696590614318848
+            },
+            "id": "IHHALPPJCH_2",
+            "locus": "IHHALP_00010"
+        }
+    ],
+    "sequences": [
+        {
+            "id": "NC_002127.1",
+            "description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "simple_id": "contig_1",
+            "name": "pOSAK1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 13:07:07",
+        "end": "2022-08-22 13:07:08"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,88 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+skip tRNA prediction...
+skip tmRNA prediction...
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2 
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ combine annotations and mark hypotheticals...
+ analyze hypothetical proteins: 2
+ detected Pfam hits: 0 
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 22
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+   hypotheticals: 2
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpb092rhfs/job_working_directory/000/4/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:00 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+NC_002127.1 cds 413 736 + IHHALP_00005 hypothetical protein
+NC_002127.1 cds 971 141 - IHHALP_00010 hypothetical protein
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_2/TEST_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,62 @@
+ID   contig_1; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   contig_1;
+XX
+DE   plasmid unnamed1, complete sequence
+XX
+OS   .
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 13:07:22
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     0
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /plasmid="unnamed1"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.fna Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,24 @@
+>contig_1 [completeness=complete] [topology=circular] [gcode=11]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,58 @@
+LOCUS       contig_1                1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  plasmid unnamed1, complete sequence.
+ACCESSION   contig_1
+VERSION     contig_1
+KEYWORDS    .
+SOURCE      None
+  ORGANISM  .
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 13:07:22
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     0
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /plasmid="unnamed1"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,34 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region contig_1 1 1330
+contig_1 Bakta region 1 1330 . + . ID=contig_1;Name=contig_1;Is_circular=true
+##FASTA
+>contig_1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,42 @@
+{
+    "genome": {
+        "genus": null,
+        "species": null,
+        "strain": null,
+        "complete": true,
+        "gram": "?",
+        "translation_table": 11
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.0
+    },
+    "features": [],
+    "sequences": [
+        {
+            "id": "contig_1",
+            "description": "[completeness=complete] [topology=circular] [gcode=11]",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "simple_id": "contig_1",
+            "orig_id": "NC_002127.1",
+            "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "name": "unnamed1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 13:07:21",
+        "end": "2022-08-22 13:07:22"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,55 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+skip tRNA prediction...
+skip tmRNA prediction...
+skip rRNA prediction...
+skip ncRNA prediction...
+skip ncRNA region prediction...
+skip CRISPR array prediction...
+skip CDS prediction...
+skip sORF prediction...
+skip gap annotation...
+skip oriC/T annotation...
+apply feature overlap filters...
+select features and create locus tags...
+selected: 0
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 0.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 0
+   hypotheticals: 0
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpb092rhfs/job_working_directory/000/6/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:00 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_3/TEST_3.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_3/TEST_3.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,3 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,87 @@
+ID   p2; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   p2;
+XX
+DE   plasmid pOSAK1, complete sequence
+XX
+OS   .
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 13:08:00
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     2
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /plasmid="pOSAK1"
+FT   gene            413..736
+FT                   /locus_tag="IHHALP_00005"
+FT   CDS             413..736
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00005"
+FT                   /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT                   AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT                   MAD"
+FT                   /codon_start=1
+FT                   /transl_table=4
+FT                   /protein_id="gnl|Bakta|IHHALP_00005"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+FT   gene            complement(join(971..1330,1..141))
+FT                   /locus_tag="IHHALP_00010"
+FT   CDS             complement(join(971..1330,1..141))
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00010"
+FT                   /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT                   EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT                   YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT                   IPI"
+FT                   /codon_start=1
+FT                   /transl_table=4
+FT                   /protein_id="gnl|Bakta|IHHALP_00010"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.ffn Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.fna Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,24 @@
+>p2 [completeness=complete] [topology=circular] [gcode=4] [plasmid-name=pOSAK1]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,83 @@
+LOCUS       p2                      1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  plasmid pOSAK1, complete sequence.
+ACCESSION   p2
+VERSION     p2
+KEYWORDS    .
+SOURCE      None
+  ORGANISM  .
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 13:08:00
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     2
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /plasmid="pOSAK1"
+     gene            413..736
+                     /locus_tag="IHHALP_00005"
+     CDS             413..736
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00005"
+                     /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+                     AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+                     MAD"
+                     /codon_start=1
+                     /transl_table=4
+                     /protein_id="gnl|Bakta|IHHALP_00005"
+                     /inference="ab initio prediction:Prodigal:2.6"
+     gene            complement(join(971..1330,1..141))
+                     /locus_tag="IHHALP_00010"
+     CDS             complement(join(971..1330,1..141))
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00010"
+                     /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+                     EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+                     YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+                     IPI"
+                     /codon_start=1
+                     /transl_table=4
+                     /protein_id="gnl|Bakta|IHHALP_00010"
+                     /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,13 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region p2 1 1330
+p2 Bakta region 1 1330 . + . ID=p2;Name=p2;Is_circular=true
+p2 Prodigal gene 413 736 . + . ID=IHHALP_00005_gene;locus_tag=IHHALP_00005
+p2 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein;Parent=IHHALP_00005_gene;inference=ab initio prediction:Prodigal:2.6
+p2 Prodigal gene 971 1471 . - . ID=IHHALP_00010_gene;locus_tag=IHHALP_00010
+p2 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein;Parent=IHHALP_00010_gene;inference=ab initio prediction:Prodigal:2.6
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.hypotheticals.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.hypotheticals.tsv Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.4.2, https://github.com/oschwengers/bakta
+#Database v3.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+p2 413 736 + IHHALP_00005 12.1 10.4
+p2 971 141 - IHHALP_00010 18.9 7.7
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,89 @@
+{
+    "genome": {
+        "genus": null,
+        "species": null,
+        "strain": null,
+        "complete": true,
+        "gram": "?",
+        "translation_table": 4
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.6203007518796992
+    },
+    "features": [
+        {
+            "type": "cds",
+            "contig": "p2",
+            "start": 413,
+            "stop": 736,
+            "strand": "+",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 2,
+            "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+            "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+            "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 12072.90819999999,
+                "isoelectric_point": 10.367886161804197
+            },
+            "id": "IHHALPPJCH_1",
+            "locus": "IHHALP_00005"
+        },
+        {
+            "type": "cds",
+            "contig": "p2",
+            "start": 971,
+            "stop": 141,
+            "strand": "-",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "AGGA/GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 1,
+            "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+            "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+            "edge": true,
+            "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 18866.325799999995,
+                "isoelectric_point": 7.696590614318848
+            },
+            "id": "IHHALPPJCH_2",
+            "locus": "IHHALP_00010"
+        }
+    ],
+    "sequences": [
+        {
+            "id": "p2",
+            "description": "[completeness=complete] [topology=circular] [gcode=4] [plasmid-name=pOSAK1]",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "orig_id": "NC_002127.1",
+            "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "name": "pOSAK1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 13:07:59",
+        "end": "2022-08-22 13:08:00"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,91 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+predict tRNAs...
+ found: 0
+predict tmRNAs...
+ found: 0
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2 
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ user protein sequences: 0
+ combine annotations and mark hypotheticals...
+ analyze hypothetical proteins: 2
+ detected Pfam hits: 0 
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 16
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+   hypotheticals: 2
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpb092rhfs/job_working_directory/000/12/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:01 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+p2 cds 413 736 + IHHALP_00005 hypothetical protein
+p2 cds 971 141 - IHHALP_00010 hypothetical protein
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_4/TEST_4.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_5/TEST_5.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_5/TEST_5.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,55 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+skip tRNA prediction...
+skip tmRNA prediction...
+skip rRNA prediction...
+skip ncRNA prediction...
+skip ncRNA region prediction...
+skip CRISPR array prediction...
+skip CDS prediction...
+skip sORF prediction...
+skip gap annotation...
+skip oriC/T annotation...
+apply feature overlap filters...
+select features and create locus tags...
+selected: 0
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 0.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 0
+   hypotheticals: 0
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpb092rhfs/job_working_directory/000/14/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:00 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/TEST_5/TEST_5.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_5/TEST_5.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 0.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 0
+hypotheticals: 0
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/prodigal.tf
b
Binary file test-data/prodigal.tf has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/replicons.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replicons.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,3 @@
+NC_002695.2 c1 c c -
+NC_002128.1 p1 plasmid c pO157
+NC_002127.1 p2 p c pOSAK1
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3f
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3i
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3m
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3p
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-mutation.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-mutation.tab Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup accession_version mutation_position mutation_symbol class subclass mutated_protein_name
+Escherichia WP_000019358.1 12 soxS_A12S MULTIDRUG AMPICILLIN/CHLORAMPHENICOL/QUINOLONE/RIFAMPIN/TETRACYCLINE Escherichia_ampicillin/chloramphenicol/quinolone/rifampin/tetracycline_resistant_SoxS
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-suppress
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-suppress Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup protein_accession protein_gi
+Escherichia AAA21095.1 151858
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-susceptible.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-susceptible.tab Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup gene_symbol accession_version resistance_cutoff class subclass resistance_protein_name
+Streptococcus_pneumoniae pbp1a WP_001040013.1            99.000000 BETA-LACTAM BETA-LACTAM Streptococcus_pneumoniae_beta-lactam_resistant_PBP1A
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pdb
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pdb has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.phr
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.phr has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pin
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pin has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.psq
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.psq has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.ptf
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.ptf has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pto
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pto has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ndb
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ndb has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nhr
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nhr has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nin
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nin has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.not
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.not has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nsq
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nsq has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ntf
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ntf has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nto
b
Binary file test-data/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nto has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/database_format_version.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/database_format_version.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,1 @@
+3.10.16
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/fam.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/fam.tab Thu Sep 01 17:28:43 2022 +0000
b
b'@@ -0,0 +1,1744 @@\n+#node_id\tparent_node_id\tgene_symbol\thmm_id\thmm_tc1\thmm_tc2\tblastrule_complete_ident\tblastrule_complete_wp_coverage\tblastrule_complete_br_coverage\tblastrule_partial_ident\tblastrule_partial_wp_coverage\tblastrule_partial_br_coverage\treportable\ttype\tsubtype\tclass\tsubclass\tfamily_name\n+ACID\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tACID\t\t\t\n+ALL\t\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\t\t\t\t\t\n+AME\tAMR\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\taminoglycoside modifying enzymes\n+AMR\tALL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\t\n+BIOCIDE\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tBIOCIDE\t\t\t\n+BcII\tbla-B1\tbla2\tNF033095.1\t500.00\t500.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tBETA-LACTAM\tCARBAPENEM\tBcII family subclass B1 metallo-beta-lactamase\n+CDF_efflux\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tCDF family cation efflux transporter\n+CMY2-MIR-ACT-EC\tbla-C\tampC\tNF012173.1\t680.00\t680.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tBETA-LACTAM\tBETA-LACTAM\tCMY2/MIR/ACT/EC family class C beta-lactamase\n+EFFLUX\tAMR\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tefflux\n+HARLDQ_not_B3\tbla-B3\t-\tNF000405.1\t350.00\t350.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tHARLDQ motif MBL-fold protein\n+HEAT\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tHEAT\t\t\t\n+HTH_5\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tArsR/SmtB family metalloregulatory transcriptional repressor\n+LHR_hdeD\tHEAT\thdeD-GI\t-\t0.00\t0.00\t90.00\t90.00\t90.00\t93.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance membrane protein HdeD-GI\n+LHR_hsp20A\tHEAT\thsp20\t-\t0.00\t0.00\t94.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\tsmall heat shock protein sHSP20\n+LHR_hsp20B\tHEAT\tshsP\t-\t0.00\t0.00\t93.00\t90.00\t90.00\t94.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\tsmall heat shock protein sHSP20-GI\n+LHR_kefB\tHEAT\tkefB-GI\t-\t0.00\t0.00\t86.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance system K+/H+ antiporter KefB-GI\n+LHR_psiE\tHEAT\tpsi-GI\t-\t0.00\t0.00\t88.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance protein PsiE-GI\n+LHR_trx\tHEAT\ttrxLHR\t-\t0.00\t0.00\t85.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance system thioredoxin Trx-GI\n+LHR_yfdX1\tHEAT\tyfdX1\t-\t0.00\t0.00\t88.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance protein YfdX1\n+LHR_yfdX2\tHEAT\tyfdX2\t-\t0.00\t0.00\t90.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance protein YfdX2\n+MATE_efflux\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug efflux MATE transporter\n+METAL-RND-IM\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tcation efflux RND transporter permease subunit\n+METAL\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tMetal Resistance\n+MFS_efflux_CHL\tMFS_efflux\tcml\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tPHENICOL\tCHLORAMPHENICOL\tchloramphenicol efflux MFS transporter\n+MFS_efflux_qac\tBIOCIDE\tqac\tNF000089.1\t900.00\t900.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tSTRESS\tBIOCIDE\tQUATERNARY AMMONIUM\tQUATERNARY AMMONIUM\tQacA/B family quaternary ammonium compound efflux MFS transporter\n+MFS_efflux\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug efflux MFS transporter\n+MerP_Gneg\tmerP\tmerP\tTIGR02052.1\t92.55\t92.55\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t1\tSTRESS\tMETAL\tMERCURY\tMERCURY\tmercury resistance system periplasmic binding protein MerP\n+OM_sidero\tVIRULENCE_Ecoli\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tVIRULENCE\tVIRULENCE\t\t\tTonB-dependent siderophore receptor\n+P-type_ATPase\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tmetal-translocating P-type ATPase\n+PERI-SENSOR\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tperiplasmic heavy metal sensor\n+RESPONSE_REG\tAMR\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tDNA-binding response regulator\n+RND-IM\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug efflux RND transporter permease subunit\n+RND-OM\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug'..b'ferase Vat(A)\n+vat(B)\tvat\tvat(B)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(B)\n+vat(C)\tvat\tvat(C)\tNF000097.1\t425.00\t425.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(C)\n+vat(D)\tvat\tvat(D)\tNF000111.1\t400.00\t400.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(D)\n+vat(E)\tvat\tvat(E)\tNF000020.1\t450.00\t450.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(E)\n+vat(F)\tvat\tvat(F)\tNF000147.1\t400.00\t400.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(F)\n+vat(H)\tvat\tvat(H)\tNF000504.1\t475.00\t425.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(H)\n+vat(I)\tvat\tvatI\tNF033468.1\t415.00\t415.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(I)\n+vat\tAMR\tvat\tNF000311.1\t300.00\t300.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tVat family streptogramin A O-acetyltransferase\n+vga(A)\tvga\tvga(A)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(A)\n+vga(B)\tvga\tvga(B)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(B)\n+vga(C)\tvga\tvga(C)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(C)\n+vga(D)\tvga\tvga(D)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(D)\n+vga(E)\tvga\tvga(E)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(E)\n+vga\tabc-f\tvga\tNF000170.1\t800.00\t800.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tVga family ABC-F type ribosomal protection protein\n+vgb(A)\tvgb\tvgb(A)\tNF000022.1\t600.00\t600.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase Vgb(A)\n+vgb(B)\tvgb\tvgb(B)\tNF000096.1\t600.00\t600.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase Vgb(B)\n+vgb(C)\tvgb\tvgbC\t-\t0.00\t0.00\t84.00\t90.00\t90.00\t88.00\t90.00\t25.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase Vgb(C)\n+vgb\tAMR\tvgb\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase\n+virF\tVIRULENCE_Ecoli\tvirF\t-\t0.00\t0.00\t94.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tAraC family invasion system transcriptional regulator VirF\n+vmlR\tabc-f\tvmlR\t-\t0.00\t0.00\t90.00\t90.00\t90.00\t96.00\t90.00\t25.00\t2\tAMR\tAMR\tMACROLIDE/PLEUROMUTILIN\tLINCOSAMIDE/STREPTOGRAMIN/TIAMULIN\tABC-F type ribosomal protection protein VmlR\n+vph\tAMR\tvph\tNF000088.1\t400.00\t400.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tTUBERACTINOMYCIN\tVIOMYCIN\tviomycin phosphotransferase\n+ybtP\tVIRULENCE\tybtP\t-\t0.00\t0.00\t85.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tyersiniabactin ABC transporter ATP-binding/permease protein YbtP\n+ybtQ\tVIRULENCE\tybtQ\t-\t0.00\t0.00\t85.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tyersiniabactin ABC transporter ATP-binding/permease protein YbtQ\n+yfeA\tVIRULENCE\tyfeA\t-\t0.00\t0.00\t83.00\t90.00\t90.00\t88.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tiron/manganese ABC transporter substrate-binding protein YfeA\n+yfeB\tVIRULENCE\tyfeB\t-\t0.00\t0.00\t86.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tiron/manganese ABC transporter ATP-binding protein YfeB\n+yfeD\tVIRULENCE\tyfeD\t-\t0.00\t0.00\t88.00\t90.00\t90.00\t92.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tiron/manganese ABC transporter permease subunit YfeD\n+zbmA\tble\tzbmA\tNF000479.1\t280.00\t280.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tBLEOMYCIN\tZORBAMYCIN\tzorbamycin binding protein ZbmA\n'
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/taxgroup.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/taxgroup.tab Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup gpipe_taxgroup number_of_nucl_ref_genes
+Acinetobacter_baumannii Acinetobacter 0
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/amrfinderplus-db/2021-09-30.1/version.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/amrfinderplus-db/2021-09-30.1/version.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,1 @@
+2021-09-30.1
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/antifam.h3f
b
Binary file test-data/test-db/antifam.h3f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/antifam.h3i
b
Binary file test-data/test-db/antifam.h3i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/antifam.h3m
b
Binary file test-data/test-db/antifam.h3m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/antifam.h3p
b
Binary file test-data/test-db/antifam.h3p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/bakta.db
b
Binary file test-data/test-db/bakta.db has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/expert-protein-sequences.dmnd
b
Binary file test-data/test-db/expert-protein-sequences.dmnd has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-genes.i1f
b
Binary file test-data/test-db/ncRNA-genes.i1f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-genes.i1i
b
Binary file test-data/test-db/ncRNA-genes.i1i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-genes.i1m
b
Binary file test-data/test-db/ncRNA-genes.i1m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-genes.i1p
b
Binary file test-data/test-db/ncRNA-genes.i1p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-regions.i1f
b
Binary file test-data/test-db/ncRNA-regions.i1f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-regions.i1i
b
Binary file test-data/test-db/ncRNA-regions.i1i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-regions.i1m
b
Binary file test-data/test-db/ncRNA-regions.i1m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/ncRNA-regions.i1p
b
Binary file test-data/test-db/ncRNA-regions.i1p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/oric.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/oric.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>ORI10010001
+TATTCTTCTATAACATTGTCAAGAATGATAGTTAAAATTCTCGAAATTGGGATATTAACTGCTTTGGAGTAATTTCTAACTTTTTGTCATACTCTTTGACTTGTATAGAAGTGTACACCTGTATCTAGTTTTTCTTGGCGTTCAACAGGAACTATTCCTGGTATTTTTGTTTTAGGTTGGGGAGGAATAGGCTGTGGTTGTGTGAATTGTTGTTGAAAATTTTGATTTTTTTGCTGTAAGAAACCATTATTATGATATTGAAAATTTTGTTCCTCTTGAAAATATCTCTCTTTTTTTGGTTTTCCAGAAAAATTTGATGAAAAAGATTTTTCTTCATTTCAATTTTCAAGATTATTTTCATTTTGTTGATTTATTTGCTCAGGCTGTTGAAATGAATTATTTTTTGATCAAAAAGATTTTGGAAAGGTTTTTTCAAAAGCAGATAAAGGTCCAAAATCAAATGAAGATGAATCTTTGTCAAAAGATGTTTCTTCTCTTTTTGACAAATTTTGTTTTTGATTAAACTTATTTTTATTTTGGGGTGTTACTTTTTCTTTTATGGAAAACAAATCTTCTTCTAAAAGACTTTGTTCTGGGTCATCATCTTGTGCTAAATCAAAGAAAAAACGTTTCTTTTTGTTA
+>ORI10010003
+GGCGTAGACACTGAATTCGATGGGGATAAGTGGTGGATAAAAGAATATAAATTAGTCATTACACTTTACTCACGAATATCCCCCTTTTTTTAGAGAAAAAATATACTTTCTTCACAAGCTTGTGTGCGGTTTTTGTTTGGTAATTCTCGAGACATAAGCACTTATCCAGATATTCACAGTTACTATTATGTGATACGACTACATTCTTTATACTTATAAGATTAATAAGGAGGAAACTAACT
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/orit.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/orit.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>CP019995|MOBP
+GTAGAATCGTTTAGTATGAGAATAGAAAACCAACGGTTTTCATGAACTTACTAAACGATTCTAC
+>CP012386|MOBP
+AGAACAATCAACAACTAATTAGGCAAATTAAGGGGTGCTAAACAACTGCTAGTAGGTGCTAGAGATGTGCTATAAAGGGTGCTAGTTTGGTGCTAGTTACTGCTAAATACGTGCTAGTTTAGGTGCTAGAAACGTGCTATATGGTGCTAAAAAGGTGCTAGTTTGCATGAAGTTACCTGCTAGCCAAGTGCTAGTGGCGTTCGTTTTTGGGTCCCACGGGAAAGCCTTGCACTGCAAGGCGGGTCAGCTTGTCTGACCCCCATTTCCCCTTATGCTCTTCCGAAACACAAAGCGCAATTAAGCGAATACTAGAGAATAAATA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/pfam.h3f
b
Binary file test-data/test-db/pfam.h3f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/pfam.h3i
b
Binary file test-data/test-db/pfam.h3i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/pfam.h3m
b
Binary file test-data/test-db/pfam.h3m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/pfam.h3p
b
Binary file test-data/test-db/pfam.h3p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/psc.dmnd
b
Binary file test-data/test-db/psc.dmnd has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/rRNA.i1f
b
Binary file test-data/test-db/rRNA.i1f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/rRNA.i1i
b
Binary file test-data/test-db/rRNA.i1i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/rRNA.i1m
b
Binary file test-data/test-db/rRNA.i1m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/rRNA.i1p
b
Binary file test-data/test-db/rRNA.i1p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/rfam-go.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/rfam-go.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,1 @@
+Rfam:RF00001 GO:0003735
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/sorf.dmnd
b
Binary file test-data/test-db/sorf.dmnd has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test-db/version.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/version.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,53 @@
+{
+  "date": "2021-08-9",
+  "major": 3,
+  "minor": 0,
+  "dependencies": [
+    {
+      "name": "AMRFinderPlus",
+      "release": "2020-09-22.2"
+    },
+    {
+      "name": "COG",
+      "release": "2014"
+    },
+    {
+      "name": "DoriC",
+      "release": "10"
+    },
+    {
+      "name": "ISFinder",
+      "release": "2019-09-25"
+    },
+    {
+      "name": "Mob-suite",
+      "release": "2.0"
+    },
+    {
+      "name": "Pfam",
+      "release": "33.1"
+    },
+    {
+      "name": "RefSeq",
+      "release": "r202"
+    },
+    {
+      "name": "Rfam",
+      "release": "14.2"
+    },
+    {
+      "name": "UniProtKB/Swiss-Prot",
+      "release": "2020_04"
+    }
+  ],
+  "experts": [
+    {
+      "name": "AMRFinderPlus",
+      "release": "3.10.1"
+    },
+    {
+      "name": "NCBI BlastRules",
+      "release": "4.0"
+    }
+  ]
+}
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/test_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_database.loc Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+# Tab separated with 4 columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - name (Galaxy shows this in the UI)
+# - path (folder name containing the NCBI DB)
+test-db-bakta "Database test" ${__HERE__}/test-db
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/NC_002127.1.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/NC_002127.1.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,20 @@
+>NC_002127.1 Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTT
+TCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCA
+TCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGT
+CTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGC
+TTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTA
+TTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAG
+GAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTA
+AGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAA
+ACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTAT
+CACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTA
+TGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGT
+TCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGA
+ATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTT
+TAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTA
+ACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATT
+AAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,87 @@
+ID   contig_1; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   contig_1;
+XX
+DE   plasmid unnamed1, complete sequence
+XX
+OS   .
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 12:57:48
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     2
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /plasmid="unnamed1"
+FT   gene            413..736
+FT                   /locus_tag="IHHALP_00005"
+FT   CDS             413..736
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00005"
+FT                   /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT                   AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT                   MAD"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00005"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+FT   gene            complement(join(971..1330,1..141))
+FT                   /locus_tag="IHHALP_00010"
+FT   CDS             complement(join(971..1330,1..141))
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00010"
+FT                   /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT                   EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT                   YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT                   IPI"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00010"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.ffn Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.fna Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,24 @@
+>contig_1 [completeness=complete] [topology=circular] [gcode=11]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,83 @@
+LOCUS       contig_1                1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  plasmid unnamed1, complete sequence.
+ACCESSION   contig_1
+VERSION     contig_1
+KEYWORDS    .
+SOURCE      None
+  ORGANISM  .
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 12:57:48
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     2
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /plasmid="unnamed1"
+     gene            413..736
+                     /locus_tag="IHHALP_00005"
+     CDS             413..736
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00005"
+                     /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+                     AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+                     MAD"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00005"
+                     /inference="ab initio prediction:Prodigal:2.6"
+     gene            complement(join(971..1330,1..141))
+                     /locus_tag="IHHALP_00010"
+     CDS             complement(join(971..1330,1..141))
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00010"
+                     /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+                     EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+                     YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+                     IPI"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00010"
+                     /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,36 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region contig_1 1 1330
+contig_1 Bakta region 1 1330 . + . ID=contig_1;Name=contig_1;Is_circular=true
+contig_1 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein
+contig_1 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein
+##FASTA
+>contig_1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.hypotheticals.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.hypotheticals.tsv Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.4.2, https://github.com/oschwengers/bakta
+#Database v3.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+contig_1 413 736 + IHHALP_00005 12.1 10.4
+contig_1 971 141 - IHHALP_00010 18.9 7.7
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,90 @@
+{
+    "genome": {
+        "genus": null,
+        "species": null,
+        "strain": null,
+        "complete": true,
+        "gram": "?",
+        "translation_table": 11
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.6203007518796992
+    },
+    "features": [
+        {
+            "type": "cds",
+            "contig": "contig_1",
+            "start": 413,
+            "stop": 736,
+            "strand": "+",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 2,
+            "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+            "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+            "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 12072.90819999999,
+                "isoelectric_point": 10.367886161804197
+            },
+            "id": "IHHALPPJCH_1",
+            "locus": "IHHALP_00005"
+        },
+        {
+            "type": "cds",
+            "contig": "contig_1",
+            "start": 971,
+            "stop": 141,
+            "strand": "-",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "AGGA/GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 1,
+            "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+            "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+            "edge": true,
+            "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 18866.325799999995,
+                "isoelectric_point": 7.696590614318848
+            },
+            "id": "IHHALPPJCH_2",
+            "locus": "IHHALP_00010"
+        }
+    ],
+    "sequences": [
+        {
+            "id": "contig_1",
+            "description": "[completeness=complete] [topology=circular] [gcode=11]",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "simple_id": "contig_1",
+            "orig_id": "NC_002127.1",
+            "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "name": "unnamed1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 12:57:47",
+        "end": "2022-08-22 12:57:48"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,90 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+predict tRNAs...
+ found: 0
+predict tmRNAs...
+ found: 0
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2 
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ combine annotations and mark hypotheticals...
+ analyze hypothetical proteins: 2
+ detected Pfam hits: 0 
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 22
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+   hypotheticals: 2
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpqcic3cc5/job_working_directory/000/2/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:01 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+contig_1 cds 413 736 + IHHALP_00005 hypothetical protein
+contig_1 cds 971 141 - IHHALP_00010 hypothetical protein
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_1/TEST_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_1/TEST_1.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,89 @@
+ID   NC_002127; SV 1; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   NC_002127;
+XX
+DE   Escherichia coli o157:h7 Sakai plasmid pOSAK1, complete sequence
+XX
+OS   Escherichia coli o157:h7 Sakai
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 12:58:03
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     2
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /organism="Escherichia coli o157:h7 Sakai"
+FT                   /strain="Sakai"
+FT                   /plasmid="pOSAK1"
+FT   gene            413..736
+FT                   /locus_tag="IHHALP_00005"
+FT   CDS             413..736
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00005"
+FT                   /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT                   AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT                   MAD"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00005"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+FT   gene            complement(join(971..1330,1..141))
+FT                   /locus_tag="IHHALP_00010"
+FT   CDS             complement(join(971..1330,1..141))
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00010"
+FT                   /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT                   EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT                   YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT                   IPI"
+FT                   /codon_start=1
+FT                   /transl_table=11
+FT                   /protein_id="gnl|Bakta|IHHALP_00010"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.ffn Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,24 @@
+>NC_002127.1 Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,85 @@
+LOCUS       NC_002127.1             1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  Escherichia coli o157:h7 Sakai plasmid pOSAK1, complete sequence.
+ACCESSION   NC_002127
+VERSION     NC_002127.1
+KEYWORDS    .
+SOURCE      Escherichia coli o157:h7 Sakai
+  ORGANISM  Escherichia coli o157:h7 Sakai
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 12:58:03
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     2
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /organism="Escherichia coli o157:h7 Sakai"
+                     /strain="Sakai"
+                     /plasmid="pOSAK1"
+     gene            413..736
+                     /locus_tag="IHHALP_00005"
+     CDS             413..736
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00005"
+                     /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+                     AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+                     MAD"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00005"
+                     /inference="ab initio prediction:Prodigal:2.6"
+     gene            complement(join(971..1330,1..141))
+                     /locus_tag="IHHALP_00010"
+     CDS             complement(join(971..1330,1..141))
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00010"
+                     /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+                     EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+                     YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+                     IPI"
+                     /codon_start=1
+                     /transl_table=11
+                     /protein_id="gnl|Bakta|IHHALP_00010"
+                     /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,37 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# organism Escherichia coli o157:h7 Sakai
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region NC_002127.1 1 1330
+NC_002127.1 Bakta region 1 1330 . + . ID=NC_002127.1;Name=NC_002127.1;Is_circular=true
+NC_002127.1 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein
+NC_002127.1 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein
+##FASTA
+>NC_002127.1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.hypotheticals.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.hypotheticals.tsv Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.4.2, https://github.com/oschwengers/bakta
+#Database v3.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+NC_002127.1 413 736 + IHHALP_00005 12.1 10.4
+NC_002127.1 971 141 - IHHALP_00010 18.9 7.7
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,89 @@
+{
+    "genome": {
+        "genus": "Escherichia",
+        "species": "coli o157:h7",
+        "strain": "Sakai",
+        "plasmid": "pOSAK1",
+        "complete": true,
+        "gram": "?",
+        "translation_table": 11
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.6203007518796992
+    },
+    "features": [
+        {
+            "type": "cds",
+            "contig": "NC_002127.1",
+            "start": 413,
+            "stop": 736,
+            "strand": "+",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 2,
+            "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+            "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+            "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 12072.90819999999,
+                "isoelectric_point": 10.367886161804197
+            },
+            "id": "IHHALPPJCH_1",
+            "locus": "IHHALP_00005"
+        },
+        {
+            "type": "cds",
+            "contig": "NC_002127.1",
+            "start": 971,
+            "stop": 141,
+            "strand": "-",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "AGGA/GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 1,
+            "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+            "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+            "edge": true,
+            "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 18866.325799999995,
+                "isoelectric_point": 7.696590614318848
+            },
+            "id": "IHHALPPJCH_2",
+            "locus": "IHHALP_00010"
+        }
+    ],
+    "sequences": [
+        {
+            "id": "NC_002127.1",
+            "description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "simple_id": "contig_1",
+            "name": "pOSAK1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 12:58:02",
+        "end": "2022-08-22 12:58:03"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,88 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+skip tRNA prediction...
+skip tmRNA prediction...
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2 
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ combine annotations and mark hypotheticals...
+ analyze hypothetical proteins: 2
+ detected Pfam hits: 0 
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 22
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+   hypotheticals: 2
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpqcic3cc5/job_working_directory/000/4/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:00 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+NC_002127.1 cds 413 736 + IHHALP_00005 hypothetical protein
+NC_002127.1 cds 971 141 - IHHALP_00010 hypothetical protein
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_2/TEST_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_2/TEST_2.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,62 @@
+ID   contig_1; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   contig_1;
+XX
+DE   plasmid unnamed1, complete sequence
+XX
+OS   .
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 12:58:17
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     0
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /plasmid="unnamed1"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.fna Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,24 @@
+>contig_1 [completeness=complete] [topology=circular] [gcode=11]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,58 @@
+LOCUS       contig_1                1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  plasmid unnamed1, complete sequence.
+ACCESSION   contig_1
+VERSION     contig_1
+KEYWORDS    .
+SOURCE      None
+  ORGANISM  .
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 12:58:17
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     0
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /plasmid="unnamed1"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,34 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region contig_1 1 1330
+contig_1 Bakta region 1 1330 . + . ID=contig_1;Name=contig_1;Is_circular=true
+##FASTA
+>contig_1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,42 @@
+{
+    "genome": {
+        "genus": null,
+        "species": null,
+        "strain": null,
+        "complete": true,
+        "gram": "?",
+        "translation_table": 11
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.0
+    },
+    "features": [],
+    "sequences": [
+        {
+            "id": "contig_1",
+            "description": "[completeness=complete] [topology=circular] [gcode=11]",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "simple_id": "contig_1",
+            "orig_id": "NC_002127.1",
+            "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "name": "unnamed1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 12:58:17",
+        "end": "2022-08-22 12:58:17"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,55 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+skip tRNA prediction...
+skip tmRNA prediction...
+skip rRNA prediction...
+skip ncRNA prediction...
+skip ncRNA region prediction...
+skip CRISPR array prediction...
+skip CDS prediction...
+skip sORF prediction...
+skip gap annotation...
+skip oriC/T annotation...
+apply feature overlap filters...
+select features and create locus tags...
+selected: 0
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 0.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 0
+   hypotheticals: 0
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpqcic3cc5/job_working_directory/000/6/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:00 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,3 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_3/TEST_3.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_3/TEST_3.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 0.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 0
+hypotheticals: 0
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.embl Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,87 @@
+ID   p2; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC   p2;
+XX
+DE   plasmid pOSAK1, complete sequence
+XX
+OS   .
+OC   .
+XX
+CC   Annotated with Bakta
+CC   Software: v1.4.2
+CC   Database: v3.0
+CC   DOI: 10.1099/mgen.0.000685
+CC   URL: github.com/oschwengers/bakta
+CC   
+CC   ##Genome Annotation Summary:##
+CC   Annotation Date                :: 08/22/2022, 12:58:56
+CC   Annotation Pipeline            :: Bakta
+CC   Annotation Software version    ::  v1.4.2
+CC   Annotation Database version    ::  v3.0
+CC   CDSs                           ::     2
+CC   tRNAs                          ::     0
+CC   tmRNAs                         ::     0
+CC   rRNAs                          ::     0
+CC   ncRNAs                         ::     0
+CC   regulatory ncRNAs              ::     0
+CC   CRISPR Arrays                  ::     0
+CC   oriCs/oriVs                    ::     0
+CC   oriTs                          ::     0
+CC   gaps                           ::     0
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   source          1..1330
+FT                   /mol_type="genomic DNA"
+FT                   /plasmid="pOSAK1"
+FT   gene            413..736
+FT                   /locus_tag="IHHALP_00005"
+FT   CDS             413..736
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00005"
+FT                   /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT                   AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT                   MAD"
+FT                   /codon_start=1
+FT                   /transl_table=4
+FT                   /protein_id="gnl|Bakta|IHHALP_00005"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+FT   gene            complement(join(971..1330,1..141))
+FT                   /locus_tag="IHHALP_00010"
+FT   CDS             complement(join(971..1330,1..141))
+FT                   /product="hypothetical protein"
+FT                   /locus_tag="IHHALP_00010"
+FT                   /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT                   EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT                   YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT                   IPI"
+FT                   /codon_start=1
+FT                   /transl_table=4
+FT                   /protein_id="gnl|Bakta|IHHALP_00010"
+FT                   /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ   Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+     ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc        60
+     gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc       120
+     agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg       180
+     tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt       240
+     tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt       300
+     gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac       360
+     cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa       420
+     acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga       480
+     agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt       540
+     acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga       600
+     agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga       660
+     cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag       720
+     gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt       780
+     aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga       840
+     tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc       900
+     agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc       960
+     tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa      1020
+     ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat      1080
+     cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat      1140
+     taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa      1200
+     aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc      1260
+     tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg      1320
+     cttctatttg                                                             1330
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.ffn Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.fna Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,24 @@
+>p2 [completeness=complete] [topology=circular] [gcode=4] [plasmid-name=pOSAK1]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.gbff Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,83 @@
+LOCUS       p2                      1330 bp    DNA     circular BCT 22-AUG-2022
+DEFINITION  plasmid pOSAK1, complete sequence.
+ACCESSION   p2
+VERSION     p2
+KEYWORDS    .
+SOURCE      None
+  ORGANISM  .
+            .
+COMMENT     Annotated with Bakta
+            Software: v1.4.2
+            Database: v3.0
+            DOI: 10.1099/mgen.0.000685
+            URL: github.com/oschwengers/bakta
+            
+            ##Genome Annotation Summary:##
+            Annotation Date                :: 08/22/2022, 12:58:56
+            Annotation Pipeline            :: Bakta
+            Annotation Software version    ::  v1.4.2
+            Annotation Database version    ::  v3.0
+            CDSs                           ::     2
+            tRNAs                          ::     0
+            tmRNAs                         ::     0
+            rRNAs                          ::     0
+            ncRNAs                         ::     0
+            regulatory ncRNAs              ::     0
+            CRISPR Arrays                  ::     0
+            oriCs/oriVs                    ::     0
+            oriTs                          ::     0
+            gaps                           ::     0
+FEATURES             Location/Qualifiers
+     source          1..1330
+                     /mol_type="genomic DNA"
+                     /plasmid="pOSAK1"
+     gene            413..736
+                     /locus_tag="IHHALP_00005"
+     CDS             413..736
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00005"
+                     /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+                     AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+                     MAD"
+                     /codon_start=1
+                     /transl_table=4
+                     /protein_id="gnl|Bakta|IHHALP_00005"
+                     /inference="ab initio prediction:Prodigal:2.6"
+     gene            complement(join(971..1330,1..141))
+                     /locus_tag="IHHALP_00010"
+     CDS             complement(join(971..1330,1..141))
+                     /product="hypothetical protein"
+                     /locus_tag="IHHALP_00010"
+                     /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+                     EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+                     YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+                     IPI"
+                     /codon_start=1
+                     /transl_table=4
+                     /protein_id="gnl|Bakta|IHHALP_00010"
+                     /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+        1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+       61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+      121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+      181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+      241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+      301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+      361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+      421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+      481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+      541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+      601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+      661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+      721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+      781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+      841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+      901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+      961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+     1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+     1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+     1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+     1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+     1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+     1321 cttctatttg
+//
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.gff3 Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,13 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.4.2
+# Database: v3.0
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region p2 1 1330
+p2 Bakta region 1 1330 . + . ID=p2;Name=p2;Is_circular=true
+p2 Prodigal gene 413 736 . + . ID=IHHALP_00005_gene;locus_tag=IHHALP_00005
+p2 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein;Parent=IHHALP_00005_gene;inference=ab initio prediction:Prodigal:2.6
+p2 Prodigal gene 971 1471 . - . ID=IHHALP_00010_gene;locus_tag=IHHALP_00010
+p2 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein;Parent=IHHALP_00010_gene;inference=ab initio prediction:Prodigal:2.6
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.hypotheticals.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.hypotheticals.tsv Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.4.2, https://github.com/oschwengers/bakta
+#Database v3.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+p2 413 736 + IHHALP_00005 12.1 10.4
+p2 971 141 - IHHALP_00010 18.9 7.7
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,89 @@
+{
+    "genome": {
+        "genus": null,
+        "species": null,
+        "strain": null,
+        "complete": true,
+        "gram": "?",
+        "translation_table": 4
+    },
+    "stats": {
+        "no_sequences": 1,
+        "size": 1330,
+        "gc": 0.4518796992481203,
+        "n_ratio": 0.0,
+        "n50": 1330,
+        "coding_ratio": 0.6203007518796992
+    },
+    "features": [
+        {
+            "type": "cds",
+            "contig": "p2",
+            "start": 413,
+            "stop": 736,
+            "strand": "+",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 2,
+            "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+            "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+            "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 12072.90819999999,
+                "isoelectric_point": 10.367886161804197
+            },
+            "id": "IHHALPPJCH_1",
+            "locus": "IHHALP_00005"
+        },
+        {
+            "type": "cds",
+            "contig": "p2",
+            "start": 971,
+            "stop": 141,
+            "strand": "-",
+            "gene": null,
+            "product": "hypothetical protein",
+            "start_type": "ATG",
+            "rbs_motif": "AGGA/GGAG/GAGG",
+            "db_xrefs": [],
+            "frame": 1,
+            "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+            "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+            "edge": true,
+            "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+            "hypothetical": true,
+            "seq_stats": {
+                "molecular_weight": 18866.325799999995,
+                "isoelectric_point": 7.696590614318848
+            },
+            "id": "IHHALPPJCH_2",
+            "locus": "IHHALP_00010"
+        }
+    ],
+    "sequences": [
+        {
+            "id": "p2",
+            "description": "[completeness=complete] [topology=circular] [gcode=4] [plasmid-name=pOSAK1]",
+            "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+            "length": 1330,
+            "complete": true,
+            "type": "plasmid",
+            "topology": "circular",
+            "orig_id": "NC_002127.1",
+            "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+            "name": "pOSAK1"
+        }
+    ],
+    "run": {
+        "start": "2022-08-22 12:58:54",
+        "end": "2022-08-22 12:58:56"
+    },
+    "version": {
+        "bakta": "1.4.2",
+        "db": "3.0"
+    }
+}
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,91 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+predict tRNAs...
+ found: 0
+predict tmRNAs...
+ found: 0
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2 
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ user protein sequences: 0
+ combine annotations and mark hypotheticals...
+ analyze hypothetical proteins: 2
+ detected Pfam hits: 0 
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 16
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+   hypotheticals: 2
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpqcic3cc5/job_working_directory/000/12/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:01 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+#Annotated with Bakta (v1.4.2): https://github.com/oschwengers/bakta
+#Database (v3.0): https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+p2 cds 413 736 + IHHALP_00005 hypothetical protein
+p2 cds 971 141 - IHHALP_00010 hypothetical protein
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_4/TEST_4.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_4/TEST_4.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_5/TEST_5.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_5/TEST_5.log Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,55 @@
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+skip tRNA prediction...
+skip tmRNA prediction...
+skip rRNA prediction...
+skip ncRNA prediction...
+skip ncRNA region prediction...
+skip CRISPR array prediction...
+skip CDS prediction...
+skip sORF prediction...
+skip gap annotation...
+skip oriC/T annotation...
+apply feature overlap filters...
+select features and create locus tags...
+selected: 0
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 0.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 0
+   hypotheticals: 0
+   signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmpqcic3cc5/job_working_directory/000/14/working
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:00 [mm:ss].
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/TEST_5/TEST_5.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/TEST_5/TEST_5.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,29 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 0.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 0
+hypotheticals: 0
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.4.2
+Database: v3.0
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/prodigal.tf
b
Binary file test-data/tmp/prodigal.tf has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/replicons.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/replicons.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,3 @@
+NC_002695.2 c1 c c -
+NC_002128.1 p1 plasmid c pO157
+NC_002127.1 p2 p c pOSAK1
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3f
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3i
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3m
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3p
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR.LIB.h3p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-mutation.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-mutation.tab Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup accession_version mutation_position mutation_symbol class subclass mutated_protein_name
+Escherichia WP_000019358.1 12 soxS_A12S MULTIDRUG AMPICILLIN/CHLORAMPHENICOL/QUINOLONE/RIFAMPIN/TETRACYCLINE Escherichia_ampicillin/chloramphenicol/quinolone/rifampin/tetracycline_resistant_SoxS
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-suppress
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-suppress Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup protein_accession protein_gi
+Escherichia AAA21095.1 151858
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-susceptible.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt-susceptible.tab Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup gene_symbol accession_version resistance_cutoff class subclass resistance_protein_name
+Streptococcus_pneumoniae pbp1a WP_001040013.1            99.000000 BETA-LACTAM BETA-LACTAM Streptococcus_pneumoniae_beta-lactam_resistant_PBP1A
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pdb
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pdb has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.phr
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.phr has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pin
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pin has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.psq
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.psq has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.ptf
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.ptf has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pto
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMRProt.pto has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ndb
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ndb has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nhr
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nhr has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nin
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nin has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.not
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.not has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nsq
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nsq has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ntf
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.ntf has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nto
b
Binary file test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/AMR_CDS.nto has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/database_format_version.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/database_format_version.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,1 @@
+3.10.16
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/fam.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/fam.tab Thu Sep 01 17:28:43 2022 +0000
b
b'@@ -0,0 +1,1744 @@\n+#node_id\tparent_node_id\tgene_symbol\thmm_id\thmm_tc1\thmm_tc2\tblastrule_complete_ident\tblastrule_complete_wp_coverage\tblastrule_complete_br_coverage\tblastrule_partial_ident\tblastrule_partial_wp_coverage\tblastrule_partial_br_coverage\treportable\ttype\tsubtype\tclass\tsubclass\tfamily_name\n+ACID\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tACID\t\t\t\n+ALL\t\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\t\t\t\t\t\n+AME\tAMR\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\taminoglycoside modifying enzymes\n+AMR\tALL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\t\n+BIOCIDE\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tBIOCIDE\t\t\t\n+BcII\tbla-B1\tbla2\tNF033095.1\t500.00\t500.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tBETA-LACTAM\tCARBAPENEM\tBcII family subclass B1 metallo-beta-lactamase\n+CDF_efflux\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tCDF family cation efflux transporter\n+CMY2-MIR-ACT-EC\tbla-C\tampC\tNF012173.1\t680.00\t680.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tBETA-LACTAM\tBETA-LACTAM\tCMY2/MIR/ACT/EC family class C beta-lactamase\n+EFFLUX\tAMR\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tefflux\n+HARLDQ_not_B3\tbla-B3\t-\tNF000405.1\t350.00\t350.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tHARLDQ motif MBL-fold protein\n+HEAT\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tHEAT\t\t\t\n+HTH_5\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tArsR/SmtB family metalloregulatory transcriptional repressor\n+LHR_hdeD\tHEAT\thdeD-GI\t-\t0.00\t0.00\t90.00\t90.00\t90.00\t93.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance membrane protein HdeD-GI\n+LHR_hsp20A\tHEAT\thsp20\t-\t0.00\t0.00\t94.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\tsmall heat shock protein sHSP20\n+LHR_hsp20B\tHEAT\tshsP\t-\t0.00\t0.00\t93.00\t90.00\t90.00\t94.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\tsmall heat shock protein sHSP20-GI\n+LHR_kefB\tHEAT\tkefB-GI\t-\t0.00\t0.00\t86.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance system K+/H+ antiporter KefB-GI\n+LHR_psiE\tHEAT\tpsi-GI\t-\t0.00\t0.00\t88.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance protein PsiE-GI\n+LHR_trx\tHEAT\ttrxLHR\t-\t0.00\t0.00\t85.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance system thioredoxin Trx-GI\n+LHR_yfdX1\tHEAT\tyfdX1\t-\t0.00\t0.00\t88.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance protein YfdX1\n+LHR_yfdX2\tHEAT\tyfdX2\t-\t0.00\t0.00\t90.00\t90.00\t90.00\t90.00\t90.00\t25.00\t1\tSTRESS\tHEAT\t\t\theat resistance protein YfdX2\n+MATE_efflux\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug efflux MATE transporter\n+METAL-RND-IM\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tcation efflux RND transporter permease subunit\n+METAL\tSTRESS\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tMetal Resistance\n+MFS_efflux_CHL\tMFS_efflux\tcml\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tPHENICOL\tCHLORAMPHENICOL\tchloramphenicol efflux MFS transporter\n+MFS_efflux_qac\tBIOCIDE\tqac\tNF000089.1\t900.00\t900.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tSTRESS\tBIOCIDE\tQUATERNARY AMMONIUM\tQUATERNARY AMMONIUM\tQacA/B family quaternary ammonium compound efflux MFS transporter\n+MFS_efflux\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug efflux MFS transporter\n+MerP_Gneg\tmerP\tmerP\tTIGR02052.1\t92.55\t92.55\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t1\tSTRESS\tMETAL\tMERCURY\tMERCURY\tmercury resistance system periplasmic binding protein MerP\n+OM_sidero\tVIRULENCE_Ecoli\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tVIRULENCE\tVIRULENCE\t\t\tTonB-dependent siderophore receptor\n+P-type_ATPase\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tmetal-translocating P-type ATPase\n+PERI-SENSOR\tMETAL\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tSTRESS\tMETAL\t\t\tperiplasmic heavy metal sensor\n+RESPONSE_REG\tAMR\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tDNA-binding response regulator\n+RND-IM\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug efflux RND transporter permease subunit\n+RND-OM\tEFFLUX\t-\t-\t0\t0\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0\tAMR\tAMR\t\t\tmultidrug'..b'ferase Vat(A)\n+vat(B)\tvat\tvat(B)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(B)\n+vat(C)\tvat\tvat(C)\tNF000097.1\t425.00\t425.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(C)\n+vat(D)\tvat\tvat(D)\tNF000111.1\t400.00\t400.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(D)\n+vat(E)\tvat\tvat(E)\tNF000020.1\t450.00\t450.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(E)\n+vat(F)\tvat\tvat(F)\tNF000147.1\t400.00\t400.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(F)\n+vat(H)\tvat\tvat(H)\tNF000504.1\t475.00\t425.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(H)\n+vat(I)\tvat\tvatI\tNF033468.1\t415.00\t415.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin A O-acetyltransferase Vat(I)\n+vat\tAMR\tvat\tNF000311.1\t300.00\t300.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tVat family streptogramin A O-acetyltransferase\n+vga(A)\tvga\tvga(A)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(A)\n+vga(B)\tvga\tvga(B)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(B)\n+vga(C)\tvga\tvga(C)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(C)\n+vga(D)\tvga\tvga(D)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(D)\n+vga(E)\tvga\tvga(E)\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tABC-F type ribosomal protection protein Vga(E)\n+vga\tabc-f\tvga\tNF000170.1\t800.00\t800.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tLINCOSAMIDE\tLINCOSAMIDE\tVga family ABC-F type ribosomal protection protein\n+vgb(A)\tvgb\tvgb(A)\tNF000022.1\t600.00\t600.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase Vgb(A)\n+vgb(B)\tvgb\tvgb(B)\tNF000096.1\t600.00\t600.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase Vgb(B)\n+vgb(C)\tvgb\tvgbC\t-\t0.00\t0.00\t84.00\t90.00\t90.00\t88.00\t90.00\t25.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase Vgb(C)\n+vgb\tAMR\tvgb\t-\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tSTREPTOGRAMIN\tSTREPTOGRAMIN\tstreptogramin B lyase\n+virF\tVIRULENCE_Ecoli\tvirF\t-\t0.00\t0.00\t94.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tAraC family invasion system transcriptional regulator VirF\n+vmlR\tabc-f\tvmlR\t-\t0.00\t0.00\t90.00\t90.00\t90.00\t96.00\t90.00\t25.00\t2\tAMR\tAMR\tMACROLIDE/PLEUROMUTILIN\tLINCOSAMIDE/STREPTOGRAMIN/TIAMULIN\tABC-F type ribosomal protection protein VmlR\n+vph\tAMR\tvph\tNF000088.1\t400.00\t400.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tTUBERACTINOMYCIN\tVIOMYCIN\tviomycin phosphotransferase\n+ybtP\tVIRULENCE\tybtP\t-\t0.00\t0.00\t85.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tyersiniabactin ABC transporter ATP-binding/permease protein YbtP\n+ybtQ\tVIRULENCE\tybtQ\t-\t0.00\t0.00\t85.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tyersiniabactin ABC transporter ATP-binding/permease protein YbtQ\n+yfeA\tVIRULENCE\tyfeA\t-\t0.00\t0.00\t83.00\t90.00\t90.00\t88.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tiron/manganese ABC transporter substrate-binding protein YfeA\n+yfeB\tVIRULENCE\tyfeB\t-\t0.00\t0.00\t86.00\t90.00\t90.00\t96.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tiron/manganese ABC transporter ATP-binding protein YfeB\n+yfeD\tVIRULENCE\tyfeD\t-\t0.00\t0.00\t88.00\t90.00\t90.00\t92.00\t90.00\t25.00\t1\tVIRULENCE\tVIRULENCE\t\t\tiron/manganese ABC transporter permease subunit YfeD\n+zbmA\tble\tzbmA\tNF000479.1\t280.00\t280.00\t0.00\t0.00\t0.00\t0.00\t0.00\t0.00\t2\tAMR\tAMR\tBLEOMYCIN\tZORBAMYCIN\tzorbamycin binding protein ZbmA\n'
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/taxgroup.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/taxgroup.tab Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,2 @@
+#taxgroup gpipe_taxgroup number_of_nucl_ref_genes
+Acinetobacter_baumannii Acinetobacter 0
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/version.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/amrfinderplus-db/2021-09-30.1/version.txt Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,1 @@
+2021-09-30.1
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/antifam.h3f
b
Binary file test-data/tmp/test-db/antifam.h3f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/antifam.h3i
b
Binary file test-data/tmp/test-db/antifam.h3i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/antifam.h3m
b
Binary file test-data/tmp/test-db/antifam.h3m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/antifam.h3p
b
Binary file test-data/tmp/test-db/antifam.h3p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/bakta.db
b
Binary file test-data/tmp/test-db/bakta.db has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/expert-protein-sequences.dmnd
b
Binary file test-data/tmp/test-db/expert-protein-sequences.dmnd has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-genes.i1f
b
Binary file test-data/tmp/test-db/ncRNA-genes.i1f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-genes.i1i
b
Binary file test-data/tmp/test-db/ncRNA-genes.i1i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-genes.i1m
b
Binary file test-data/tmp/test-db/ncRNA-genes.i1m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-genes.i1p
b
Binary file test-data/tmp/test-db/ncRNA-genes.i1p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-regions.i1f
b
Binary file test-data/tmp/test-db/ncRNA-regions.i1f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-regions.i1i
b
Binary file test-data/tmp/test-db/ncRNA-regions.i1i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-regions.i1m
b
Binary file test-data/tmp/test-db/ncRNA-regions.i1m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/ncRNA-regions.i1p
b
Binary file test-data/tmp/test-db/ncRNA-regions.i1p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/oric.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/oric.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>ORI10010001
+TATTCTTCTATAACATTGTCAAGAATGATAGTTAAAATTCTCGAAATTGGGATATTAACTGCTTTGGAGTAATTTCTAACTTTTTGTCATACTCTTTGACTTGTATAGAAGTGTACACCTGTATCTAGTTTTTCTTGGCGTTCAACAGGAACTATTCCTGGTATTTTTGTTTTAGGTTGGGGAGGAATAGGCTGTGGTTGTGTGAATTGTTGTTGAAAATTTTGATTTTTTTGCTGTAAGAAACCATTATTATGATATTGAAAATTTTGTTCCTCTTGAAAATATCTCTCTTTTTTTGGTTTTCCAGAAAAATTTGATGAAAAAGATTTTTCTTCATTTCAATTTTCAAGATTATTTTCATTTTGTTGATTTATTTGCTCAGGCTGTTGAAATGAATTATTTTTTGATCAAAAAGATTTTGGAAAGGTTTTTTCAAAAGCAGATAAAGGTCCAAAATCAAATGAAGATGAATCTTTGTCAAAAGATGTTTCTTCTCTTTTTGACAAATTTTGTTTTTGATTAAACTTATTTTTATTTTGGGGTGTTACTTTTTCTTTTATGGAAAACAAATCTTCTTCTAAAAGACTTTGTTCTGGGTCATCATCTTGTGCTAAATCAAAGAAAAAACGTTTCTTTTTGTTA
+>ORI10010003
+GGCGTAGACACTGAATTCGATGGGGATAAGTGGTGGATAAAAGAATATAAATTAGTCATTACACTTTACTCACGAATATCCCCCTTTTTTTAGAGAAAAAATATACTTTCTTCACAAGCTTGTGTGCGGTTTTTGTTTGGTAATTCTCGAGACATAAGCACTTATCCAGATATTCACAGTTACTATTATGTGATACGACTACATTCTTTATACTTATAAGATTAATAAGGAGGAAACTAACT
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/orit.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/orit.fna Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>CP019995|MOBP
+GTAGAATCGTTTAGTATGAGAATAGAAAACCAACGGTTTTCATGAACTTACTAAACGATTCTAC
+>CP012386|MOBP
+AGAACAATCAACAACTAATTAGGCAAATTAAGGGGTGCTAAACAACTGCTAGTAGGTGCTAGAGATGTGCTATAAAGGGTGCTAGTTTGGTGCTAGTTACTGCTAAATACGTGCTAGTTTAGGTGCTAGAAACGTGCTATATGGTGCTAAAAAGGTGCTAGTTTGCATGAAGTTACCTGCTAGCCAAGTGCTAGTGGCGTTCGTTTTTGGGTCCCACGGGAAAGCCTTGCACTGCAAGGCGGGTCAGCTTGTCTGACCCCCATTTCCCCTTATGCTCTTCCGAAACACAAAGCGCAATTAAGCGAATACTAGAGAATAAATA
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/pfam.h3f
b
Binary file test-data/tmp/test-db/pfam.h3f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/pfam.h3i
b
Binary file test-data/tmp/test-db/pfam.h3i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/pfam.h3m
b
Binary file test-data/tmp/test-db/pfam.h3m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/pfam.h3p
b
Binary file test-data/tmp/test-db/pfam.h3p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/psc.dmnd
b
Binary file test-data/tmp/test-db/psc.dmnd has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/rRNA.i1f
b
Binary file test-data/tmp/test-db/rRNA.i1f has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/rRNA.i1i
b
Binary file test-data/tmp/test-db/rRNA.i1i has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/rRNA.i1m
b
Binary file test-data/tmp/test-db/rRNA.i1m has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/rRNA.i1p
b
Binary file test-data/tmp/test-db/rRNA.i1p has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/rfam-go.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/rfam-go.tsv Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,1 @@
+Rfam:RF00001 GO:0003735
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/sorf.dmnd
b
Binary file test-data/tmp/test-db/sorf.dmnd has changed
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test-db/version.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test-db/version.json Thu Sep 01 17:28:43 2022 +0000
[
@@ -0,0 +1,53 @@
+{
+  "date": "2021-08-9",
+  "major": 3,
+  "minor": 0,
+  "dependencies": [
+    {
+      "name": "AMRFinderPlus",
+      "release": "2020-09-22.2"
+    },
+    {
+      "name": "COG",
+      "release": "2014"
+    },
+    {
+      "name": "DoriC",
+      "release": "10"
+    },
+    {
+      "name": "ISFinder",
+      "release": "2019-09-25"
+    },
+    {
+      "name": "Mob-suite",
+      "release": "2.0"
+    },
+    {
+      "name": "Pfam",
+      "release": "33.1"
+    },
+    {
+      "name": "RefSeq",
+      "release": "r202"
+    },
+    {
+      "name": "Rfam",
+      "release": "14.2"
+    },
+    {
+      "name": "UniProtKB/Swiss-Prot",
+      "release": "2020_04"
+    }
+  ],
+  "experts": [
+    {
+      "name": "AMRFinderPlus",
+      "release": "3.10.1"
+    },
+    {
+      "name": "NCBI BlastRules",
+      "release": "4.0"
+    }
+  ]
+}
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/test_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/test_database.loc Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,5 @@
+# Tab separated with 4 columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - name (Galaxy shows this in the UI)
+# - path (folder name containing the NCBI DB)
+test-db-bakta "Database test" ${__HERE__}/test-db
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/tmp/user-proteins.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tmp/user-proteins.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>VFDB_test 90~~~90~~~90~~~yaxA~~~cytotoxin YaxA~~~VFDB:VFG045347,VFDB:VF0511,EC:1.1.1.1,EC:2.2.2.2
+MTQTQLAIDNVLASAENTIQLNELPKVVLDFITGEQTSVARSGGIFTKEDLINLKLYVRKGLSLPTRQDEVEAYLGYKKIDVAGLEPKDIKLLFDEIHNHALNWNDVEQAVLQQSLDLDIAAKNIISTGNEIINLINQMPITLRVKTLLRDITDKQLENITYESADHEVASALKDILDDMKGDINRHQTTTENVRKKVSDYRITLTGGELSSGDKVNGLEPQVKTKYDLMEKSNMRKSIKELDEKIKEKKQRIEQLKKDYDKFVGLSFTGAIGGIIAMAITSGIFGAKAENARKEKNALISEVAELESKVSSQRALQTALEALSLSFSDIGIRMVDAESALNHLDFMWLSVLNQITESQIQFAMINNALRLTSFVNKFQQVITPWQSVGDSARQLVDIFDEAIKEYKKVYG
+>hypo-mock-test 99~~~99~~~99~~~mock1~~~mock hypothetical user protein 1~~~USERDB:MOCK1,EC:0.0.0.0
+MAQNPFKALNINIDKIESALTQNGVTNYSSNVKNERETHISGTYKGIDFLIKLMPSGGNTTIGRASGQNNTYFDEIALIIKENCLYSDTKNFEYTIPKFSDDDRANLFEFLSEEGITITEDNNNDPNCKHQYIMTTSNGDRVRAKIYKRGSIQFQGKYLQIASLINDFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKKQLSCSLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNLGEYFTENKPKYIIREIHQETINGEIAEVLCECYTYWHENRHGLFHMKPGIADTKTINKLESIAIIDTVCQLIDGGVARLKL
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf test-data/user-proteins.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/user-proteins.faa Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,4 @@
+>VFDB_test 90~~~90~~~90~~~yaxA~~~cytotoxin YaxA~~~VFDB:VFG045347,VFDB:VF0511,EC:1.1.1.1,EC:2.2.2.2
+MTQTQLAIDNVLASAENTIQLNELPKVVLDFITGEQTSVARSGGIFTKEDLINLKLYVRKGLSLPTRQDEVEAYLGYKKIDVAGLEPKDIKLLFDEIHNHALNWNDVEQAVLQQSLDLDIAAKNIISTGNEIINLINQMPITLRVKTLLRDITDKQLENITYESADHEVASALKDILDDMKGDINRHQTTTENVRKKVSDYRITLTGGELSSGDKVNGLEPQVKTKYDLMEKSNMRKSIKELDEKIKEKKQRIEQLKKDYDKFVGLSFTGAIGGIIAMAITSGIFGAKAENARKEKNALISEVAELESKVSSQRALQTALEALSLSFSDIGIRMVDAESALNHLDFMWLSVLNQITESQIQFAMINNALRLTSFVNKFQQVITPWQSVGDSARQLVDIFDEAIKEYKKVYG
+>hypo-mock-test 99~~~99~~~99~~~mock1~~~mock hypothetical user protein 1~~~USERDB:MOCK1,EC:0.0.0.0
+MAQNPFKALNINIDKIESALTQNGVTNYSSNVKNERETHISGTYKGIDFLIKLMPSGGNTTIGRASGQNNTYFDEIALIIKENCLYSDTKNFEYTIPKFSDDDRANLFEFLSEEGITITEDNNNDPNCKHQYIMTTSNGDRVRAKIYKRGSIQFQGKYLQIASLINDFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKKQLSCSLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNLGEYFTENKPKYIIREIHQETINGEIAEVLCECYTYWHENRHGLFHMKPGIADTKTINKLESIAIIDTVCQLIDGGVARLKL
\ No newline at end of file
b
diff -r 000000000000 -r 1a27ad3d0cdf tool-data/bakta_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bakta_database.loc.sample Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,3 @@
+#This is a sample file distributed with Galaxy that enables tools
+#file has this format (white space characters are TAB characters)
+test-db-bakta "Database test" ${__HERE__}/test-db
b
diff -r 000000000000 -r 1a27ad3d0cdf tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="bakta_database" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/bakta_database.loc.sample" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 1a27ad3d0cdf tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Sep 01 17:28:43 2022 +0000
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="bakta_database" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/test_database.loc" />
+    </table>
+</tables>