# HG changeset patch
# User iuc
# Date 1690197736 0
# Node ID 3f0aa1b3e816c60c705779b2663eaaf9cc7cdf82
# Parent 865ece5ca1788fa1d5d1f3df7ab6e7b082632897
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit e2c4ab5546b135ff903545073c3223aab166d8f5
diff -r 865ece5ca178 -r 3f0aa1b3e816 bakta.xml
--- a/bakta.xml Fri Feb 10 14:20:09 2023 +0000
+++ b/bakta.xml Mon Jul 24 11:22:16 2023 +0000
@@ -11,10 +11,12 @@
-
-
+
+
@@ -128,87 +131,89 @@
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
- output_files['output_selection'] and "file_tsv" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_tsv" in output_files['output_selection']
-
- output_files['output_selection'] and "file_gff3" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_gff3" in output_files['output_selection']
-
- output_files['output_selection'] and "file_gbff" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_gbff" in output_files['output_selection']
-
- output_files['output_selection'] and "file_embl" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_embl" in output_files['output_selection']
-
- output_files['output_selection'] and "file_fna" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_fna" in output_files['output_selection']
-
- output_files['output_selection'] and "file_ffn" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_ffn" in output_files['output_selection']
-
- output_files['output_selection'] and "file_faa" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_faa" in output_files['output_selection']
-
- output_files['output_selection'] and "hypo_tsv" in output_files['output_selection']
+
+ output_files['output_selection'] and "hypo_tsv" in output_files['output_selection']
-
- output_files['output_selection'] and "hypo_fa" in output_files['output_selection']
+
+ output_files['output_selection'] and "hypo_fa" in output_files['output_selection']
-
- output_files['output_selection'] and "sum_txt" in output_files['output_selection']
+
+ output_files['output_selection'] and "sum_txt" in output_files['output_selection']
-
- output_files['output_selection'] and "file_json" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_json" in output_files['output_selection']
-
- output_files['output_selection'] and "file_plot" in output_files['output_selection']
+
+ output_files['output_selection'] and "file_plot" in output_files['output_selection']
- output_files['output_selection'] and "log_txt" in output_files['output_selection']
+ output_files['output_selection'] and "log_txt" in output_files['output_selection']
-
+
@@ -226,17 +231,13 @@
-
+
-
+
-
+
@@ -264,34 +265,25 @@
-
+
-
@@ -306,15 +298,11 @@
-
+
@@ -323,14 +311,40 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
- 1.6.1
- 1.5
- 0
+ 1.8.1
+ 1.7
+ 1
21.05
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.embl
--- a/test-data/TEST_1/TEST_1.embl Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.embl Mon Jul 24 11:22:16 2023 +0000
@@ -8,16 +8,13 @@
OC .
XX
CC Annotated with Bakta
-CC Software: v1.5.0
-CC Database: v4.0
+CC Software: v1.8.1
+CC Database: v5.0, full
CC DOI: 10.1099/mgen.0.000685
CC URL: github.com/oschwengers/bakta
CC
CC ##Genome Annotation Summary:##
-CC Annotation Date :: 09/16/2022, 07:31:59
-CC Annotation Pipeline :: Bakta
-CC Annotation Software version :: v1.5.0
-CC Annotation Database version :: v4.0
+CC Annotation Date :: 06/19/2023, 09:30:06
CC CDSs :: 2
CC tRNAs :: 0
CC tmRNAs :: 0
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.fna
--- a/test-data/TEST_1/TEST_1.fna Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.fna Mon Jul 24 11:22:16 2023 +0000
@@ -1,4 +1,4 @@
->contig_1 [completeness=complete] [topology=circular] [gcode=11]
+>contig_1 [gcode=11] [completeness=complete] [topology=circular] [plasmid-name=unnamed1]
TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.gbff
--- a/test-data/TEST_1/TEST_1.gbff Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.gbff Mon Jul 24 11:22:16 2023 +0000
@@ -1,4 +1,4 @@
-LOCUS contig_1 1330 bp DNA circular BCT 16-SEP-2022
+LOCUS contig_1 1330 bp DNA circular BCT 19-JUN-2023
DEFINITION plasmid unnamed1, complete sequence.
ACCESSION contig_1
VERSION contig_1
@@ -7,16 +7,13 @@
ORGANISM .
.
COMMENT Annotated with Bakta
- Software: v1.5.0
- Database: v4.0
+ Software: v1.8.1
+ Database: v5.0, full
DOI: 10.1099/mgen.0.000685
URL: github.com/oschwengers/bakta
##Genome Annotation Summary:##
- Annotation Date :: 09/16/2022, 07:31:59
- Annotation Pipeline :: Bakta
- Annotation Software version :: v1.5.0
- Annotation Database version :: v4.0
+ Annotation Date :: 06/19/2023, 09:30:06
CDSs :: 2
tRNAs :: 0
tmRNAs :: 0
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.gff3
--- a/test-data/TEST_1/TEST_1.gff3 Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.gff3 Mon Jul 24 11:22:16 2023 +0000
@@ -1,8 +1,8 @@
##gff-version 3
##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
# Annotated with Bakta
-# Software: v1.5.0
-# Database: v4.0
+# Software: v1.8.1
+# Database: v5.0, full
# DOI: 10.1099/mgen.0.000685
# URL: github.com/oschwengers/bakta
##sequence-region contig_1 1 1330
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.hypotheticals.tsv
--- a/test-data/TEST_1/TEST_1.hypotheticals.tsv Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.hypotheticals.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -1,5 +1,5 @@
-#Annotated with Bakta v1.5.0, https://github.com/oschwengers/bakta
-#Database v4.0, https://doi.org/10.5281/zenodo.4247252
+#Annotated with Bakta v1.8.1, https://github.com/oschwengers/bakta
+#Database v5.0, https://doi.org/10.5281/zenodo.4247252
#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
contig_1 413 736 + IHHALP_00005 12.1 10.4
contig_1 971 141 - IHHALP_00010 18.9 7.7
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.json
--- a/test-data/TEST_1/TEST_1.json Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.json Mon Jul 24 11:22:16 2023 +0000
@@ -32,6 +32,7 @@
"aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
"nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
"hypothetical": true,
+ "genes": [],
"seq_stats": {
"molecular_weight": 12072.90819999999,
"isoelectric_point": 10.367886161804197
@@ -56,6 +57,7 @@
"edge": true,
"nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
"hypothetical": true,
+ "genes": [],
"seq_stats": {
"molecular_weight": 18866.325799999995,
"isoelectric_point": 7.696590614318848
@@ -67,7 +69,7 @@
"sequences": [
{
"id": "contig_1",
- "description": "[completeness=complete] [topology=circular] [gcode=11]",
+ "description": "[gcode=11] [completeness=complete] [topology=circular] [plasmid-name=unnamed1]",
"sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
"length": 1330,
"complete": true,
@@ -80,11 +82,14 @@
}
],
"run": {
- "start": "2022-09-16 07:31:58",
- "end": "2022-09-16 07:31:59"
+ "start": "2023-06-19 09:30:05",
+ "end": "2023-06-19 09:30:06"
},
"version": {
- "bakta": "1.5.0",
- "db": "4.0"
+ "bakta": "1.8.1",
+ "db": {
+ "version": "5.0",
+ "type": "full"
+ }
}
}
\ No newline at end of file
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.log
--- a/test-data/TEST_1/TEST_1.log Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.log Mon Jul 24 11:22:16 2023 +0000
@@ -1,3 +1,13 @@
+Bakta v1.8.1
+Options and arguments:
+ input: /tmp/tmp6wdeee6u/files/3/2/d/dataset_32d4899b-3214-49f6-a57b-29ff0e04425d.dat
+ db: /tmp/tmp6wdeee6u/job_working_directory/000/2/working/database_path, version 5.0, full
+ output: /tmp/tmp6wdeee6u/job_working_directory/000/2/working/bakta_output
+ tmp directory: /tmp/tmp6wdeee6u/tmp/tmpm3wbu37o
+ prefix: bakta_output
+ threads: 1
+ translation table: 11
+
parse genome sequences...
imported: 1
filtered & revised: 1
@@ -53,6 +63,8 @@
apply feature overlap filters...
select features and create locus tags...
selected: 2
+improve annotations...
+ revised gene symbols: 0
genome statistics:
Genome size: 1,330 bp
@@ -70,15 +82,15 @@
ncRNA regions: 0
CRISPR arrays: 0
CDSs: 2
- hypotheticals: 2
- pseudogenes: 0
- signal peptides: 0
+ hypotheticals: 2
+ pseudogenes: 0
+ signal peptides: 0
sORFs: 0
gaps: 0
oriCs/oriVs: 0
oriTs: 0
-export annotation results to: /tmp/tmpmnqj1xog/job_working_directory/000/2/working
+export annotation results to: /tmp/tmp6wdeee6u/job_working_directory/000/2/working/bakta_output
human readable TSV...
GFF3...
INSDC GenBank & EMBL...
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.tsv
--- a/test-data/TEST_1/TEST_1.tsv Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -1,5 +1,8 @@
-#Annotated with Bakta (v1.5.0): https://github.com/oschwengers/bakta
-#Database (v4.0): https://doi.org/10.5281/zenodo.4247252
+# Annotated with Bakta
+# Software: v1.8.1
+# Database: v5.0, full
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
contig_1 cds 413 736 + IHHALP_00005 hypothetical protein
contig_1 cds 971 141 - IHHALP_00010 hypothetical protein
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1.txt
--- a/test-data/TEST_1/TEST_1.txt Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_1/TEST_1.txt Mon Jul 24 11:22:16 2023 +0000
@@ -24,7 +24,7 @@
oriTs: 0
Bakta:
-Software: v1.5.0
-Database: v4.0
+Software: v1.8.1
+Database: v5.0, full
DOI: 10.1099/mgen.0.000685
URL: github.com/oschwengers/bakta
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_1/TEST_1_plot.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_1/TEST_1_plot.svg Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,2400 @@
+
+
+
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.embl
--- a/test-data/TEST_2/TEST_2.embl Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-ID NC_002127; SV 1; circular; DNA; ; PRO; 1330 BP.
-XX
-AC NC_002127;
-XX
-DE Escherichia coli o157:h7 Sakai plasmid pOSAK1, complete sequence
-XX
-OS Escherichia coli o157:h7 Sakai
-OC .
-XX
-CC Annotated with Bakta
-CC Software: v1.5.0
-CC Database: v4.0
-CC DOI: 10.1099/mgen.0.000685
-CC URL: github.com/oschwengers/bakta
-CC
-CC ##Genome Annotation Summary:##
-CC Annotation Date :: 09/16/2022, 07:32:10
-CC Annotation Pipeline :: Bakta
-CC Annotation Software version :: v1.5.0
-CC Annotation Database version :: v4.0
-CC CDSs :: 2
-CC tRNAs :: 0
-CC tmRNAs :: 0
-CC rRNAs :: 0
-CC ncRNAs :: 0
-CC regulatory ncRNAs :: 0
-CC CRISPR Arrays :: 0
-CC oriCs/oriVs :: 0
-CC oriTs :: 0
-CC gaps :: 0
-CC pseudogenes :: 0
-XX
-FH Key Location/Qualifiers
-FH
-FT source 1..1330
-FT /mol_type="genomic DNA"
-FT /organism="Escherichia coli o157:h7 Sakai"
-FT /strain="Sakai"
-FT /plasmid="pOSAK1"
-FT gene 413..736
-FT /locus_tag="IHHALP_00005"
-FT CDS 413..736
-FT /product="hypothetical protein"
-FT /locus_tag="IHHALP_00005"
-FT /protein_id="gnl|Bakta|IHHALP_00005"
-FT /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
-FT AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
-FT MAD"
-FT /codon_start=1
-FT /transl_table=11
-FT /inference="ab initio prediction:Prodigal:2.6"
-FT gene complement(join(971..1330,1..141))
-FT /locus_tag="IHHALP_00010"
-FT CDS complement(join(971..1330,1..141))
-FT /product="hypothetical protein"
-FT /locus_tag="IHHALP_00010"
-FT /protein_id="gnl|Bakta|IHHALP_00010"
-FT /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
-FT EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
-FT YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
-FT IPI"
-FT /codon_start=1
-FT /transl_table=11
-FT /inference="ab initio prediction:Prodigal:2.6"
-XX
-SQ Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
- ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc 60
- gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc 120
- agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg 180
- tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt 240
- tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt 300
- gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac 360
- cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa 420
- acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga 480
- agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt 540
- acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga 600
- agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga 660
- cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag 720
- gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt 780
- aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga 840
- tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc 900
- agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc 960
- tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa 1020
- ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat 1080
- cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat 1140
- taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa 1200
- aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc 1260
- tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg 1320
- cttctatttg 1330
-//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.gbff
--- a/test-data/TEST_2/TEST_2.gbff Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-LOCUS NC_002127.1 1330 bp DNA circular BCT 16-SEP-2022
-DEFINITION Escherichia coli o157:h7 Sakai plasmid pOSAK1, complete sequence.
-ACCESSION NC_002127
-VERSION NC_002127.1
-KEYWORDS .
-SOURCE Escherichia coli o157:h7 Sakai
- ORGANISM Escherichia coli o157:h7 Sakai
- .
-COMMENT Annotated with Bakta
- Software: v1.5.0
- Database: v4.0
- DOI: 10.1099/mgen.0.000685
- URL: github.com/oschwengers/bakta
-
- ##Genome Annotation Summary:##
- Annotation Date :: 09/16/2022, 07:32:10
- Annotation Pipeline :: Bakta
- Annotation Software version :: v1.5.0
- Annotation Database version :: v4.0
- CDSs :: 2
- tRNAs :: 0
- tmRNAs :: 0
- rRNAs :: 0
- ncRNAs :: 0
- regulatory ncRNAs :: 0
- CRISPR Arrays :: 0
- oriCs/oriVs :: 0
- oriTs :: 0
- gaps :: 0
- pseudogenes :: 0
-FEATURES Location/Qualifiers
- source 1..1330
- /mol_type="genomic DNA"
- /organism="Escherichia coli o157:h7 Sakai"
- /strain="Sakai"
- /plasmid="pOSAK1"
- gene 413..736
- /locus_tag="IHHALP_00005"
- CDS 413..736
- /product="hypothetical protein"
- /locus_tag="IHHALP_00005"
- /protein_id="gnl|Bakta|IHHALP_00005"
- /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
- AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
- MAD"
- /codon_start=1
- /transl_table=11
- /inference="ab initio prediction:Prodigal:2.6"
- gene complement(join(971..1330,1..141))
- /locus_tag="IHHALP_00010"
- CDS complement(join(971..1330,1..141))
- /product="hypothetical protein"
- /locus_tag="IHHALP_00010"
- /protein_id="gnl|Bakta|IHHALP_00010"
- /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
- EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
- YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
- IPI"
- /codon_start=1
- /transl_table=11
- /inference="ab initio prediction:Prodigal:2.6"
-ORIGIN
- 1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
- 61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
- 121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
- 181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
- 241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
- 301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
- 361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
- 421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
- 481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
- 541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
- 601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
- 661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
- 721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
- 781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
- 841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
- 901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
- 961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
- 1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
- 1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
- 1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
- 1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
- 1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
- 1321 cttctatttg
-//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.gff3
--- a/test-data/TEST_2/TEST_2.gff3 Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_2/TEST_2.gff3 Mon Jul 24 11:22:16 2023 +0000
@@ -2,8 +2,8 @@
##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
# organism Escherichia coli o157:h7 Sakai
# Annotated with Bakta
-# Software: v1.5.0
-# Database: v4.0
+# Software: v1.8.1
+# Database: v5.0, full
# DOI: 10.1099/mgen.0.000685
# URL: github.com/oschwengers/bakta
##sequence-region NC_002127.1 1 1330
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.hypotheticals.tsv
--- a/test-data/TEST_2/TEST_2.hypotheticals.tsv Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#Annotated with Bakta v1.5.0, https://github.com/oschwengers/bakta
-#Database v4.0, https://doi.org/10.5281/zenodo.4247252
-#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
-NC_002127.1 413 736 + IHHALP_00005 12.1 10.4
-NC_002127.1 971 141 - IHHALP_00010 18.9 7.7
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.json
--- a/test-data/TEST_2/TEST_2.json Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-{
- "genome": {
- "genus": "Escherichia",
- "species": "coli o157:h7",
- "strain": "Sakai",
- "plasmid": "pOSAK1",
- "complete": true,
- "gram": "?",
- "translation_table": 11
- },
- "stats": {
- "no_sequences": 1,
- "size": 1330,
- "gc": 0.4518796992481203,
- "n_ratio": 0.0,
- "n50": 1330,
- "coding_ratio": 0.6203007518796992
- },
- "features": [
- {
- "type": "cds",
- "contig": "NC_002127.1",
- "start": 413,
- "stop": 736,
- "strand": "+",
- "gene": null,
- "product": "hypothetical protein",
- "start_type": "ATG",
- "rbs_motif": "GGAG/GAGG",
- "db_xrefs": [],
- "frame": 2,
- "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
- "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
- "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
- "hypothetical": true,
- "seq_stats": {
- "molecular_weight": 12072.90819999999,
- "isoelectric_point": 10.367886161804197
- },
- "id": "IHHALPPJCH_1",
- "locus": "IHHALP_00005"
- },
- {
- "type": "cds",
- "contig": "NC_002127.1",
- "start": 971,
- "stop": 141,
- "strand": "-",
- "gene": null,
- "product": "hypothetical protein",
- "start_type": "ATG",
- "rbs_motif": "AGGA/GGAG/GAGG",
- "db_xrefs": [],
- "frame": 1,
- "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
- "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
- "edge": true,
- "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
- "hypothetical": true,
- "seq_stats": {
- "molecular_weight": 18866.325799999995,
- "isoelectric_point": 7.696590614318848
- },
- "id": "IHHALPPJCH_2",
- "locus": "IHHALP_00010"
- }
- ],
- "sequences": [
- {
- "id": "NC_002127.1",
- "description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
- "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
- "length": 1330,
- "complete": true,
- "type": "plasmid",
- "topology": "circular",
- "simple_id": "contig_1",
- "name": "pOSAK1"
- }
- ],
- "run": {
- "start": "2022-09-16 07:32:09",
- "end": "2022-09-16 07:32:10"
- },
- "version": {
- "bakta": "1.5.0",
- "db": "4.0"
- }
-}
\ No newline at end of file
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.log
--- a/test-data/TEST_2/TEST_2.log Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-parse genome sequences...
- imported: 1
- filtered & revised: 1
- plasmids: 1
-
-start annotation...
-skip tRNA prediction...
-skip tmRNA prediction...
-predict rRNAs...
- found: 0
-predict ncRNAs...
- found: 0
-predict ncRNA regions...
- found: 0
-predict CRISPR arrays...
- found: 0
-predict & annotate CDSs...
- predicted: 2
- discarded spurious: 0
- revised translational exceptions: 0
- detected IPSs: 0
- found PSCs: 0
- found PSCCs: 0
- lookup annotations...
- conduct expert systems...
- amrfinder: 0
- protein sequences: 0
- combine annotations and mark hypotheticals...
- detect pseudogenes...
- pseudogene candidates: 0
- found pseudogenes: 0
-analyze hypothetical proteins: 2
- detected Pfam hits: 0
- calculated proteins statistics
- revise special cases...
-extract sORF...
- potential: 22
- discarded due to overlaps: 2
- discarded spurious: 0
- detected IPSs: 0
- found PSCs: 0
- lookup annotations...
- filter and combine annotations...
- filtered sORFs: 0
-detect gaps...
- found: 0
-detect oriCs/oriVs...
- found: 0
-detect oriTs...
- found: 0
-apply feature overlap filters...
-select features and create locus tags...
-selected: 2
-
-genome statistics:
- Genome size: 1,330 bp
- Contigs/replicons: 1
- GC: 45.2 %
- N50: 1,330
- N ratio: 0.0 %
- coding density: 62.0 %
-
-annotation summary:
- tRNAs: 0
- tmRNAs: 0
- rRNAs: 0
- ncRNAs: 0
- ncRNA regions: 0
- CRISPR arrays: 0
- CDSs: 2
- hypotheticals: 2
- pseudogenes: 0
- signal peptides: 0
- sORFs: 0
- gaps: 0
- oriCs/oriVs: 0
- oriTs: 0
-
-export annotation results to: /tmp/tmpmnqj1xog/job_working_directory/000/4/working
- human readable TSV...
- GFF3...
- INSDC GenBank & EMBL...
- genome sequences...
- feature nucleotide sequences...
- translated CDS sequences...
- circular genome plot...
- hypothetical TSV...
- translated hypothetical CDS sequences...
- machine readable JSON...
- genome and annotation summary...
-
-If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
-Annotation successfully finished in 0:00 [mm:ss].
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.svg
--- a/test-data/TEST_2/TEST_2.svg Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2400 +0,0 @@
-
-
-
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.tsv
--- a/test-data/TEST_2/TEST_2.tsv Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_2/TEST_2.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -1,5 +1,8 @@
-#Annotated with Bakta (v1.5.0): https://github.com/oschwengers/bakta
-#Database (v4.0): https://doi.org/10.5281/zenodo.4247252
+# Annotated with Bakta
+# Software: v1.8.1
+# Database: v5.0, full
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
NC_002127.1 cds 413 736 + IHHALP_00005 hypothetical protein
NC_002127.1 cds 971 141 - IHHALP_00010 hypothetical protein
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2.txt
--- a/test-data/TEST_2/TEST_2.txt Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-Sequence(s):
-Length: 1330
-Count: 1
-GC: 45.2
-N50: 1330
-N ratio: 0.0
-coding density: 62.0
-
-Annotation:
-tRNAs: 0
-tmRNAs: 0
-rRNAs: 0
-ncRNAs: 0
-ncRNA regions: 0
-CRISPR arrays: 0
-CDSs: 2
-pseudogenes: 0
-hypotheticals: 2
-signal peptides: 0
-sORFs: 0
-gaps: 0
-oriCs: 0
-oriVs: 0
-oriTs: 0
-
-Bakta:
-Software: v1.5.0
-Database: v4.0
-DOI: 10.1099/mgen.0.000685
-URL: github.com/oschwengers/bakta
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_2/TEST_2_plot.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_2/TEST_2_plot.svg Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,2400 @@
+
+
+
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.embl
--- a/test-data/TEST_3/TEST_3.embl Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-ID contig_1; ; circular; DNA; ; PRO; 1330 BP.
-XX
-AC contig_1;
-XX
-DE plasmid unnamed1, complete sequence
-XX
-OS .
-OC .
-XX
-CC Annotated with Bakta
-CC Software: v1.5.0
-CC Database: v4.0
-CC DOI: 10.1099/mgen.0.000685
-CC URL: github.com/oschwengers/bakta
-CC
-CC ##Genome Annotation Summary:##
-CC Annotation Date :: 09/16/2022, 07:32:21
-CC Annotation Pipeline :: Bakta
-CC Annotation Software version :: v1.5.0
-CC Annotation Database version :: v4.0
-CC CDSs :: 0
-CC tRNAs :: 0
-CC tmRNAs :: 0
-CC rRNAs :: 0
-CC ncRNAs :: 0
-CC regulatory ncRNAs :: 0
-CC CRISPR Arrays :: 0
-CC oriCs/oriVs :: 0
-CC oriTs :: 0
-CC gaps :: 0
-CC pseudogenes :: 0
-XX
-FH Key Location/Qualifiers
-FH
-FT source 1..1330
-FT /mol_type="genomic DNA"
-FT /plasmid="unnamed1"
-XX
-SQ Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
- ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc 60
- gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc 120
- agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg 180
- tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt 240
- tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt 300
- gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac 360
- cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa 420
- acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga 480
- agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt 540
- acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga 600
- agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga 660
- cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag 720
- gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt 780
- aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga 840
- tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc 900
- agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc 960
- tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa 1020
- ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat 1080
- cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat 1140
- taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa 1200
- aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc 1260
- tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg 1320
- cttctatttg 1330
-//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.gbff
--- a/test-data/TEST_3/TEST_3.gbff Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-LOCUS contig_1 1330 bp DNA circular BCT 16-SEP-2022
-DEFINITION plasmid unnamed1, complete sequence.
-ACCESSION contig_1
-VERSION contig_1
-KEYWORDS .
-SOURCE None
- ORGANISM .
- .
-COMMENT Annotated with Bakta
- Software: v1.5.0
- Database: v4.0
- DOI: 10.1099/mgen.0.000685
- URL: github.com/oschwengers/bakta
-
- ##Genome Annotation Summary:##
- Annotation Date :: 09/16/2022, 07:32:21
- Annotation Pipeline :: Bakta
- Annotation Software version :: v1.5.0
- Annotation Database version :: v4.0
- CDSs :: 0
- tRNAs :: 0
- tmRNAs :: 0
- rRNAs :: 0
- ncRNAs :: 0
- regulatory ncRNAs :: 0
- CRISPR Arrays :: 0
- oriCs/oriVs :: 0
- oriTs :: 0
- gaps :: 0
- pseudogenes :: 0
-FEATURES Location/Qualifiers
- source 1..1330
- /mol_type="genomic DNA"
- /plasmid="unnamed1"
-ORIGIN
- 1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
- 61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
- 121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
- 181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
- 241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
- 301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
- 361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
- 421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
- 481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
- 541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
- 601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
- 661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
- 721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
- 781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
- 841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
- 901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
- 961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
- 1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
- 1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
- 1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
- 1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
- 1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
- 1321 cttctatttg
-//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.gff3
--- a/test-data/TEST_3/TEST_3.gff3 Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_3/TEST_3.gff3 Mon Jul 24 11:22:16 2023 +0000
@@ -1,8 +1,8 @@
##gff-version 3
##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
# Annotated with Bakta
-# Software: v1.5.0
-# Database: v4.0
+# Software: v1.8.1
+# Database: v5.0, full
# DOI: 10.1099/mgen.0.000685
# URL: github.com/oschwengers/bakta
##sequence-region contig_1 1 1330
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.json
--- a/test-data/TEST_3/TEST_3.json Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-{
- "genome": {
- "genus": null,
- "species": null,
- "strain": null,
- "complete": true,
- "gram": "?",
- "translation_table": 11
- },
- "stats": {
- "no_sequences": 1,
- "size": 1330,
- "gc": 0.4518796992481203,
- "n_ratio": 0.0,
- "n50": 1330,
- "coding_ratio": 0.0
- },
- "features": [],
- "sequences": [
- {
- "id": "contig_1",
- "description": "[completeness=complete] [topology=circular] [gcode=11]",
- "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
- "length": 1330,
- "complete": true,
- "type": "plasmid",
- "topology": "circular",
- "simple_id": "contig_1",
- "orig_id": "NC_002127.1",
- "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
- "name": "unnamed1"
- }
- ],
- "run": {
- "start": "2022-09-16 07:32:20",
- "end": "2022-09-16 07:32:21"
- },
- "version": {
- "bakta": "1.5.0",
- "db": "4.0"
- }
-}
\ No newline at end of file
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.log
--- a/test-data/TEST_3/TEST_3.log Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-parse genome sequences...
- imported: 1
- filtered & revised: 1
- plasmids: 1
-
-start annotation...
-skip tRNA prediction...
-skip tmRNA prediction...
-skip rRNA prediction...
-skip ncRNA prediction...
-skip ncRNA region prediction...
-skip CRISPR array prediction...
-skip CDS prediction...
-skip sORF prediction...
-skip gap annotation...
-skip oriC/T annotation...
-apply feature overlap filters...
-select features and create locus tags...
-selected: 0
-
-genome statistics:
- Genome size: 1,330 bp
- Contigs/replicons: 1
- GC: 45.2 %
- N50: 1,330
- N ratio: 0.0 %
- coding density: 0.0 %
-
-annotation summary:
- tRNAs: 0
- tmRNAs: 0
- rRNAs: 0
- ncRNAs: 0
- ncRNA regions: 0
- CRISPR arrays: 0
- CDSs: 0
- hypotheticals: 0
- pseudogenes: 0
- signal peptides: 0
- sORFs: 0
- gaps: 0
- oriCs/oriVs: 0
- oriTs: 0
-
-export annotation results to: /tmp/tmpmnqj1xog/job_working_directory/000/6/working
- human readable TSV...
- GFF3...
- INSDC GenBank & EMBL...
- genome sequences...
- feature nucleotide sequences...
- translated CDS sequences...
- circular genome plot...
- machine readable JSON...
- genome and annotation summary...
-
-If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
-Annotation successfully finished in 0:00 [mm:ss].
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.svg
--- a/test-data/TEST_3/TEST_3.svg Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2397 +0,0 @@
-
-
-
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_3/TEST_3.tsv
--- a/test-data/TEST_3/TEST_3.tsv Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_3/TEST_3.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -1,3 +1,6 @@
-#Annotated with Bakta (v1.5.0): https://github.com/oschwengers/bakta
-#Database (v4.0): https://doi.org/10.5281/zenodo.4247252
+# Annotated with Bakta
+# Software: v1.8.1
+# Database: v5.0, full
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.embl
--- a/test-data/TEST_4/TEST_4.embl Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-ID p2; ; circular; DNA; ; PRO; 1330 BP.
-XX
-AC p2;
-XX
-DE plasmid pOSAK1, complete sequence
-XX
-OS .
-OC .
-XX
-CC Annotated with Bakta
-CC Software: v1.5.0
-CC Database: v4.0
-CC DOI: 10.1099/mgen.0.000685
-CC URL: github.com/oschwengers/bakta
-CC
-CC ##Genome Annotation Summary:##
-CC Annotation Date :: 09/16/2022, 07:32:50
-CC Annotation Pipeline :: Bakta
-CC Annotation Software version :: v1.5.0
-CC Annotation Database version :: v4.0
-CC CDSs :: 2
-CC tRNAs :: 0
-CC tmRNAs :: 0
-CC rRNAs :: 0
-CC ncRNAs :: 0
-CC regulatory ncRNAs :: 0
-CC CRISPR Arrays :: 0
-CC oriCs/oriVs :: 0
-CC oriTs :: 0
-CC gaps :: 0
-CC pseudogenes :: 0
-XX
-FH Key Location/Qualifiers
-FH
-FT source 1..1330
-FT /mol_type="genomic DNA"
-FT /plasmid="pOSAK1"
-FT gene 413..736
-FT /locus_tag="IHHALP_00005"
-FT CDS 413..736
-FT /product="hypothetical protein"
-FT /locus_tag="IHHALP_00005"
-FT /protein_id="gnl|Bakta|IHHALP_00005"
-FT /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
-FT AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
-FT MAD"
-FT /codon_start=1
-FT /transl_table=4
-FT /inference="ab initio prediction:Prodigal:2.6"
-FT gene complement(join(971..1330,1..141))
-FT /locus_tag="IHHALP_00010"
-FT CDS complement(join(971..1330,1..141))
-FT /product="hypothetical protein"
-FT /locus_tag="IHHALP_00010"
-FT /protein_id="gnl|Bakta|IHHALP_00010"
-FT /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
-FT EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
-FT YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
-FT IPI"
-FT /codon_start=1
-FT /transl_table=4
-FT /inference="ab initio prediction:Prodigal:2.6"
-XX
-SQ Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
- ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc 60
- gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc 120
- agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg 180
- tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt 240
- tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt 300
- gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac 360
- cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa 420
- acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga 480
- agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt 540
- acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga 600
- agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga 660
- cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag 720
- gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt 780
- aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga 840
- tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc 900
- agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc 960
- tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa 1020
- ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat 1080
- cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat 1140
- taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa 1200
- aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc 1260
- tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg 1320
- cttctatttg 1330
-//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.gbff
--- a/test-data/TEST_4/TEST_4.gbff Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-LOCUS p2 1330 bp DNA circular BCT 16-SEP-2022
-DEFINITION plasmid pOSAK1, complete sequence.
-ACCESSION p2
-VERSION p2
-KEYWORDS .
-SOURCE None
- ORGANISM .
- .
-COMMENT Annotated with Bakta
- Software: v1.5.0
- Database: v4.0
- DOI: 10.1099/mgen.0.000685
- URL: github.com/oschwengers/bakta
-
- ##Genome Annotation Summary:##
- Annotation Date :: 09/16/2022, 07:32:50
- Annotation Pipeline :: Bakta
- Annotation Software version :: v1.5.0
- Annotation Database version :: v4.0
- CDSs :: 2
- tRNAs :: 0
- tmRNAs :: 0
- rRNAs :: 0
- ncRNAs :: 0
- regulatory ncRNAs :: 0
- CRISPR Arrays :: 0
- oriCs/oriVs :: 0
- oriTs :: 0
- gaps :: 0
- pseudogenes :: 0
-FEATURES Location/Qualifiers
- source 1..1330
- /mol_type="genomic DNA"
- /plasmid="pOSAK1"
- gene 413..736
- /locus_tag="IHHALP_00005"
- CDS 413..736
- /product="hypothetical protein"
- /locus_tag="IHHALP_00005"
- /protein_id="gnl|Bakta|IHHALP_00005"
- /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
- AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
- MAD"
- /codon_start=1
- /transl_table=4
- /inference="ab initio prediction:Prodigal:2.6"
- gene complement(join(971..1330,1..141))
- /locus_tag="IHHALP_00010"
- CDS complement(join(971..1330,1..141))
- /product="hypothetical protein"
- /locus_tag="IHHALP_00010"
- /protein_id="gnl|Bakta|IHHALP_00010"
- /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
- EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
- YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
- IPI"
- /codon_start=1
- /transl_table=4
- /inference="ab initio prediction:Prodigal:2.6"
-ORIGIN
- 1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
- 61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
- 121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
- 181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
- 241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
- 301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
- 361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
- 421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
- 481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
- 541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
- 601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
- 661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
- 721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
- 781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
- 841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
- 901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
- 961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
- 1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
- 1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
- 1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
- 1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
- 1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
- 1321 cttctatttg
-//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.gff3
--- a/test-data/TEST_4/TEST_4.gff3 Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_4/TEST_4.gff3 Mon Jul 24 11:22:16 2023 +0000
@@ -1,8 +1,8 @@
##gff-version 3
##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
# Annotated with Bakta
-# Software: v1.5.0
-# Database: v4.0
+# Software: v1.8.1
+# Database: v5.0, full
# DOI: 10.1099/mgen.0.000685
# URL: github.com/oschwengers/bakta
##sequence-region p2 1 1330
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.hypotheticals.tsv
--- a/test-data/TEST_4/TEST_4.hypotheticals.tsv Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#Annotated with Bakta v1.5.0, https://github.com/oschwengers/bakta
-#Database v4.0, https://doi.org/10.5281/zenodo.4247252
-#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
-p2 413 736 + IHHALP_00005 12.1 10.4
-p2 971 141 - IHHALP_00010 18.9 7.7
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.json
--- a/test-data/TEST_4/TEST_4.json Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-{
- "genome": {
- "genus": null,
- "species": null,
- "strain": null,
- "complete": true,
- "gram": "?",
- "translation_table": 4
- },
- "stats": {
- "no_sequences": 1,
- "size": 1330,
- "gc": 0.4518796992481203,
- "n_ratio": 0.0,
- "n50": 1330,
- "coding_ratio": 0.6203007518796992
- },
- "features": [
- {
- "type": "cds",
- "contig": "p2",
- "start": 413,
- "stop": 736,
- "strand": "+",
- "gene": null,
- "product": "hypothetical protein",
- "start_type": "ATG",
- "rbs_motif": "GGAG/GAGG",
- "db_xrefs": [],
- "frame": 2,
- "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
- "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
- "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
- "hypothetical": true,
- "seq_stats": {
- "molecular_weight": 12072.90819999999,
- "isoelectric_point": 10.367886161804197
- },
- "id": "IHHALPPJCH_1",
- "locus": "IHHALP_00005"
- },
- {
- "type": "cds",
- "contig": "p2",
- "start": 971,
- "stop": 141,
- "strand": "-",
- "gene": null,
- "product": "hypothetical protein",
- "start_type": "ATG",
- "rbs_motif": "AGGA/GGAG/GAGG",
- "db_xrefs": [],
- "frame": 1,
- "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
- "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
- "edge": true,
- "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
- "hypothetical": true,
- "seq_stats": {
- "molecular_weight": 18866.325799999995,
- "isoelectric_point": 7.696590614318848
- },
- "id": "IHHALPPJCH_2",
- "locus": "IHHALP_00010"
- }
- ],
- "sequences": [
- {
- "id": "p2",
- "description": "[completeness=complete] [topology=circular] [gcode=4] [plasmid-name=pOSAK1]",
- "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
- "length": 1330,
- "complete": true,
- "type": "plasmid",
- "topology": "circular",
- "orig_id": "NC_002127.1",
- "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
- "name": "pOSAK1"
- }
- ],
- "run": {
- "start": "2022-09-16 07:32:48",
- "end": "2022-09-16 07:32:50"
- },
- "version": {
- "bakta": "1.5.0",
- "db": "4.0"
- }
-}
\ No newline at end of file
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.log
--- a/test-data/TEST_4/TEST_4.log Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-parse genome sequences...
- imported: 1
- filtered & revised: 1
- plasmids: 1
-
-start annotation...
-predict tRNAs...
- found: 0
-predict tmRNAs...
- found: 0
-predict rRNAs...
- found: 0
-predict ncRNAs...
- found: 0
-predict ncRNA regions...
- found: 0
-predict CRISPR arrays...
- found: 0
-predict & annotate CDSs...
- predicted: 2
- discarded spurious: 0
- revised translational exceptions: 0
- detected IPSs: 0
- found PSCs: 0
- found PSCCs: 0
- lookup annotations...
- conduct expert systems...
- amrfinder: 0
- protein sequences: 0
- user protein sequences: 0
- combine annotations and mark hypotheticals...
- detect pseudogenes...
- pseudogene candidates: 0
- found pseudogenes: 0
-analyze hypothetical proteins: 2
- detected Pfam hits: 0
- calculated proteins statistics
- revise special cases...
-extract sORF...
- potential: 16
- discarded due to overlaps: 2
- discarded spurious: 0
- detected IPSs: 0
- found PSCs: 0
- lookup annotations...
- filter and combine annotations...
- filtered sORFs: 0
-detect gaps...
- found: 0
-detect oriCs/oriVs...
- found: 0
-detect oriTs...
- found: 0
-apply feature overlap filters...
-select features and create locus tags...
-selected: 2
-
-genome statistics:
- Genome size: 1,330 bp
- Contigs/replicons: 1
- GC: 45.2 %
- N50: 1,330
- N ratio: 0.0 %
- coding density: 62.0 %
-
-annotation summary:
- tRNAs: 0
- tmRNAs: 0
- rRNAs: 0
- ncRNAs: 0
- ncRNA regions: 0
- CRISPR arrays: 0
- CDSs: 2
- hypotheticals: 2
- pseudogenes: 0
- signal peptides: 0
- sORFs: 0
- gaps: 0
- oriCs/oriVs: 0
- oriTs: 0
-
-export annotation results to: /tmp/tmpmnqj1xog/job_working_directory/000/12/working
- human readable TSV...
- GFF3...
- INSDC GenBank & EMBL...
- genome sequences...
- feature nucleotide sequences...
- translated CDS sequences...
- circular genome plot...
- hypothetical TSV...
- translated hypothetical CDS sequences...
- machine readable JSON...
- genome and annotation summary...
-
-If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
-Annotation successfully finished in 0:01 [mm:ss].
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.svg
--- a/test-data/TEST_4/TEST_4.svg Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2400 +0,0 @@
-
-
-
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.tsv
--- a/test-data/TEST_4/TEST_4.tsv Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_4/TEST_4.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -1,5 +1,8 @@
-#Annotated with Bakta (v1.5.0): https://github.com/oschwengers/bakta
-#Database (v4.0): https://doi.org/10.5281/zenodo.4247252
+# Annotated with Bakta
+# Software: v1.8.1
+# Database: v5.0, full
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
p2 cds 413 736 + IHHALP_00005 hypothetical protein
p2 cds 971 141 - IHHALP_00010 hypothetical protein
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4.txt
--- a/test-data/TEST_4/TEST_4.txt Fri Feb 10 14:20:09 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-Sequence(s):
-Length: 1330
-Count: 1
-GC: 45.2
-N50: 1330
-N ratio: 0.0
-coding density: 62.0
-
-Annotation:
-tRNAs: 0
-tmRNAs: 0
-rRNAs: 0
-ncRNAs: 0
-ncRNA regions: 0
-CRISPR arrays: 0
-CDSs: 2
-pseudogenes: 0
-hypotheticals: 2
-signal peptides: 0
-sORFs: 0
-gaps: 0
-oriCs: 0
-oriVs: 0
-oriTs: 0
-
-Bakta:
-Software: v1.5.0
-Database: v4.0
-DOI: 10.1099/mgen.0.000685
-URL: github.com/oschwengers/bakta
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_4/TEST_4_plot.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_4/TEST_4_plot.svg Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,2400 @@
+
+
+
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_5/TEST_5.log
--- a/test-data/TEST_5/TEST_5.log Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_5/TEST_5.log Mon Jul 24 11:22:16 2023 +0000
@@ -1,3 +1,25 @@
+Bakta v1.8.1
+Options and arguments:
+ input: /tmp/tmp6wdeee6u/files/0/2/9/dataset_0293aae0-943d-4d1b-af6c-e5ed41b419da.dat
+ db: /tmp/tmp6wdeee6u/job_working_directory/000/13/working/database_path, version 5.0, full
+ output: /tmp/tmp6wdeee6u/job_working_directory/000/13/working/bakta_output
+ tmp directory: /tmp/tmp6wdeee6u/tmp/tmpigp60rnb
+ prefix: bakta_output
+ threads: 1
+ translation table: 4
+ complete replicons: True
+ skip tRNA: True
+ skip tmRNA: True
+ skip rRNA: True
+ skip ncRNA: True
+ skip ncRNA region: True
+ skip CRISPR: True
+ skip CDS: True
+ skip sORF: True
+ skip gap: True
+ skip oriC/V/T: True
+ skip plot: True
+
parse genome sequences...
imported: 1
filtered & revised: 1
@@ -17,6 +39,8 @@
apply feature overlap filters...
select features and create locus tags...
selected: 0
+improve annotations...
+ revised gene symbols: 0
genome statistics:
Genome size: 1,330 bp
@@ -34,22 +58,22 @@
ncRNA regions: 0
CRISPR arrays: 0
CDSs: 0
- hypotheticals: 0
- pseudogenes: 0
- signal peptides: 0
+ hypotheticals: 0
+ pseudogenes: 0
+ signal peptides: 0
sORFs: 0
gaps: 0
oriCs/oriVs: 0
oriTs: 0
-export annotation results to: /tmp/tmpmnqj1xog/job_working_directory/000/14/working
+export annotation results to: /tmp/tmp6wdeee6u/job_working_directory/000/13/working/bakta_output
human readable TSV...
GFF3...
INSDC GenBank & EMBL...
genome sequences...
feature nucleotide sequences...
translated CDS sequences...
- circular genome plot...
+ skip generation of circular genome plot...
machine readable JSON...
genome and annotation summary...
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_5/TEST_5.txt
--- a/test-data/TEST_5/TEST_5.txt Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/TEST_5/TEST_5.txt Mon Jul 24 11:22:16 2023 +0000
@@ -24,7 +24,7 @@
oriTs: 0
Bakta:
-Software: v1.5.0
-Database: v4.0
+Software: v1.8.1
+Database: v5.0, full
DOI: 10.1099/mgen.0.000685
URL: github.com/oschwengers/bakta
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.embl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.embl Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,85 @@
+ID contig_1; ; circular; DNA; ; PRO; 1330 BP.
+XX
+AC contig_1;
+XX
+DE plasmid unnamed1, complete sequence
+XX
+OS .
+OC .
+XX
+CC Annotated with Bakta
+CC Software: v1.8.1
+CC Database: v5.0, full
+CC DOI: 10.1099/mgen.0.000685
+CC URL: github.com/oschwengers/bakta
+CC
+CC ##Genome Annotation Summary:##
+CC Annotation Date :: 06/19/2023, 09:31:33
+CC CDSs :: 2
+CC tRNAs :: 0
+CC tmRNAs :: 0
+CC rRNAs :: 0
+CC ncRNAs :: 0
+CC regulatory ncRNAs :: 0
+CC CRISPR Arrays :: 0
+CC oriCs/oriVs :: 0
+CC oriTs :: 0
+CC gaps :: 0
+CC pseudogenes :: 0
+XX
+FH Key Location/Qualifiers
+FH
+FT source 1..1330
+FT /mol_type="genomic DNA"
+FT /plasmid="unnamed1"
+FT gene 413..736
+FT /locus_tag="IHHALP_00005"
+FT CDS 413..736
+FT /product="hypothetical protein"
+FT /locus_tag="IHHALP_00005"
+FT /protein_id="gnl|Bakta|IHHALP_00005"
+FT /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+FT AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+FT MAD"
+FT /codon_start=1
+FT /transl_table=11
+FT /inference="ab initio prediction:Prodigal:2.6"
+FT gene complement(join(971..1330,1..141))
+FT /locus_tag="IHHALP_00010"
+FT CDS complement(join(971..1330,1..141))
+FT /product="hypothetical protein"
+FT /locus_tag="IHHALP_00010"
+FT /protein_id="gnl|Bakta|IHHALP_00010"
+FT /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+FT EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+FT YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+FT IPI"
+FT /codon_start=1
+FT /transl_table=11
+FT /inference="ab initio prediction:Prodigal:2.6"
+XX
+SQ Sequence 1330 BP; 330 A; 291 C; 310 G; 399 T; 0 other;
+ ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc 60
+ gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc 120
+ agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg 180
+ tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt 240
+ tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt 300
+ gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac 360
+ cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa 420
+ acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga 480
+ agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt 540
+ acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga 600
+ agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga 660
+ cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag 720
+ gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt 780
+ aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga 840
+ tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc 900
+ agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc 960
+ tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa 1020
+ ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat 1080
+ cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat 1140
+ taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa 1200
+ aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc 1260
+ tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg 1320
+ cttctatttg 1330
+//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.faa Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.ffn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.ffn Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG
+>IHHALP_00010 hypothetical protein
+ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.fna Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,24 @@
+>contig_1 [gcode=11] [completeness=complete] [topology=circular] [plasmid-name=unnamed1]
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.gbff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.gbff Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,81 @@
+LOCUS contig_1 1330 bp DNA circular BCT 19-JUN-2023
+DEFINITION plasmid unnamed1, complete sequence.
+ACCESSION contig_1
+VERSION contig_1
+KEYWORDS .
+SOURCE None
+ ORGANISM .
+ .
+COMMENT Annotated with Bakta
+ Software: v1.8.1
+ Database: v5.0, full
+ DOI: 10.1099/mgen.0.000685
+ URL: github.com/oschwengers/bakta
+
+ ##Genome Annotation Summary:##
+ Annotation Date :: 06/19/2023, 09:31:33
+ CDSs :: 2
+ tRNAs :: 0
+ tmRNAs :: 0
+ rRNAs :: 0
+ ncRNAs :: 0
+ regulatory ncRNAs :: 0
+ CRISPR Arrays :: 0
+ oriCs/oriVs :: 0
+ oriTs :: 0
+ gaps :: 0
+ pseudogenes :: 0
+FEATURES Location/Qualifiers
+ source 1..1330
+ /mol_type="genomic DNA"
+ /plasmid="unnamed1"
+ gene 413..736
+ /locus_tag="IHHALP_00005"
+ CDS 413..736
+ /product="hypothetical protein"
+ /locus_tag="IHHALP_00005"
+ /protein_id="gnl|Bakta|IHHALP_00005"
+ /translation="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA
+ AALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRL
+ MAD"
+ /codon_start=1
+ /transl_table=11
+ /inference="ab initio prediction:Prodigal:2.6"
+ gene complement(join(971..1330,1..141))
+ /locus_tag="IHHALP_00010"
+ CDS complement(join(971..1330,1..141))
+ /product="hypothetical protein"
+ /locus_tag="IHHALP_00010"
+ /protein_id="gnl|Bakta|IHHALP_00010"
+ /translation="MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELA
+ EEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDR
+ YVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKG
+ IPI"
+ /codon_start=1
+ /transl_table=11
+ /inference="ab initio prediction:Prodigal:2.6"
+ORIGIN
+ 1 ttcttctgcg agttcgtgca gcttctcaca catggtggcc tgctcgtcag catcgagtgc
+ 61 gtccagtttt tcgagcagcg tcaggctctg gctttttatg aatcccgcca tgttgagtgc
+ 121 agtttgctgc tgcttgttca tctttctgtt ttctccgttc tgtctgtcat ctgcgtcgtg
+ 181 tgattatatc gcgcaccact tttcgaccgt cttaccgccg gtattctgcc gacggacatt
+ 241 tcagtcagac aacactgtca ctgccaaaaa acagcagtgc tttgttggta attcgaactt
+ 301 gcagacagga caggatgtgc aattgttata ccgcgcatac atgcacgcta ttacaattac
+ 361 cctggtcagg gcttcgcccc gacaccccat gtcagatacg gagccatgtt ttatgacaaa
+ 421 acgaagtgga agtaatacgc gcaggcgggc tatcagtcgc cctgttcgtc tgacggcaga
+ 481 agaagaccag gaaatcagaa aaagggctgc tgaatgcggc aagaccgttt ctggtttttt
+ 541 acgggcggca gctctcggta agaaagttaa ctcactgact gatgaccggg tgctgaaaga
+ 601 agttatgcga ctgggggcgt tgcagaaaaa actctttatc gacggcaagc gtgtcgggga
+ 661 cagagagtat gcggaggtgc tgatcgctat tacggagtat caccgtgccc tgttatccag
+ 721 gcttatggca gattagcttc ccggagagaa actgtcgaaa acagacggta tgaacgccgt
+ 781 aagcccccaa accgatcgcc attcactttc atgcatagct atgcagtgag ctgaaagcga
+ 841 tcctgacgca tttttccggt ttaccccggg gaaaacatct ctttttgcgg tgtctgcgtc
+ 901 agaatcgcgt tcagcgcgtt ttggcggtgc gcgtaatgag acgttatggt aaatgtcttc
+ 961 tggcttgata ttatattgga atgccttttt tcaaagcaaa tgatgtggct ttggatagaa
+ 1021 ggtttacgtt gatcttatca aagttttttt taaagaacga agccgagagc tcagataaat
+ 1081 cattatattc atcagttttc gtaactttgt ttaatgtgta acttgaaaac ttctcgccat
+ 1141 taaatgacgt atagacgtaa cgatcttttt ttccaccgtt aggaattatt aaatcaaaaa
+ 1201 aaacatcacc cttgcttttc tttttcttca agtcggattc gatttttgag aaaaattcgc
+ 1261 tcgggctata aatatcagta gcatagacaa taaataaagt tttatcttta ttttttattg
+ 1321 cttctatttg
+//
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.gff3 Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,36 @@
+##gff-version 3
+##feature-ontology https://github.com/The-Sequence-Ontology/SO-Ontologies/blob/v3.1/so.obo
+# Annotated with Bakta
+# Software: v1.8.1
+# Database: v5.0, full
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+##sequence-region contig_1 1 1330
+contig_1 Bakta region 1 1330 . + . ID=contig_1;Name=contig_1;Is_circular=true
+contig_1 Prodigal CDS 413 736 . + 0 ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein
+contig_1 Prodigal CDS 971 1471 . - 0 ID=IHHALP_00010;Name=hypothetical protein;locus_tag=IHHALP_00010;product=hypothetical protein
+##FASTA
+>contig_1
+TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC
+GTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGC
+AGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTG
+TGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATT
+TCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTT
+GCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTAC
+CCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAA
+ACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGA
+AGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTT
+ACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGA
+AGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGA
+CAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAG
+GCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGT
+AAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGA
+TCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTC
+AGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTC
+TGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAA
+GGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAAT
+CATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCAT
+TAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAA
+AAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGC
+TCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTG
+CTTCTATTTG
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.hypotheticals.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.hypotheticals.faa Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,4 @@
+>IHHALP_00005 hypothetical protein
+MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
+>IHHALP_00010 hypothetical protein
+MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.hypotheticals.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.hypotheticals.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,5 @@
+#Annotated with Bakta v1.8.1, https://github.com/oschwengers/bakta
+#Database v5.0, https://doi.org/10.5281/zenodo.4247252
+#Sequence Id Start Stop Strand Locus Tag Mol Weight [kDa] Iso El. Point Pfam hits Dbxrefs
+contig_1 413 736 + IHHALP_00005 12.1 10.4
+contig_1 971 141 - IHHALP_00010 18.9 7.7
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.json Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,95 @@
+{
+ "genome": {
+ "genus": null,
+ "species": null,
+ "strain": null,
+ "complete": true,
+ "gram": "?",
+ "translation_table": 11
+ },
+ "stats": {
+ "no_sequences": 1,
+ "size": 1330,
+ "gc": 0.4518796992481203,
+ "n_ratio": 0.0,
+ "n50": 1330,
+ "coding_ratio": 0.6203007518796992
+ },
+ "features": [
+ {
+ "type": "cds",
+ "contig": "contig_1",
+ "start": 413,
+ "stop": 736,
+ "strand": "+",
+ "gene": null,
+ "product": "hypothetical protein",
+ "start_type": "ATG",
+ "rbs_motif": "GGAG/GAGG",
+ "db_xrefs": [],
+ "frame": 2,
+ "aa": "MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVMRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD",
+ "aa_hexdigest": "d9bdebc84195542e775c3d22458b507e",
+ "nt": "ATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAG",
+ "hypothetical": true,
+ "genes": [],
+ "seq_stats": {
+ "molecular_weight": 12072.90819999999,
+ "isoelectric_point": 10.367886161804197
+ },
+ "id": "IHHALPPJCH_1",
+ "locus": "IHHALP_00005"
+ },
+ {
+ "type": "cds",
+ "contig": "contig_1",
+ "start": 971,
+ "stop": 141,
+ "strand": "-",
+ "gene": null,
+ "product": "hypothetical protein",
+ "start_type": "ATG",
+ "rbs_motif": "AGGA/GGAG/GAGG",
+ "db_xrefs": [],
+ "frame": 1,
+ "aa": "MNKQQQTALNMAGFIKSQSLTLLEKLDALDADEQATMCEKLHELAEEQIEAIKNKDKTLFIVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSFNGEKFSSYTLNKVTKTDEYNDLSELSASFFKKNFDKINVNLLSKATSFALKKGIPI",
+ "aa_hexdigest": "1e7027cbe48346e06a83e802a9385584",
+ "edge": true,
+ "nt": "ATGAACAAGCAGCAGCAAACTGCACTCAACATGGCGGGATTCATAAAAAGCCAGAGCCTGACGCTGCTCGAAAAACTGGACGCACTCGATGCTGACGAGCAGGCCACCATGTGTGAGAAGCTGCACGAACTCGCAGAAGAACAAATAGAAGCAATAAAAAATAAAGATAAAACTTTATTTATTGTCTATGCTACTGATATTTATAGCCCGAGCGAATTTTTCTCAAAAATCGAATCCGACTTGAAGAAAAAGAAAAGCAAGGGTGATGTTTTTTTTGATTTAATAATTCCTAACGGTGGAAAAAAAGATCGTTACGTCTATACGTCATTTAATGGCGAGAAGTTTTCAAGTTACACATTAAACAAAGTTACGAAAACTGATGAATATAATGATTTATCTGAGCTCTCGGCTTCGTTCTTTAAAAAAAACTTTGATAAGATCAACGTAAACCTTCTATCCAAAGCCACATCATTTGCTTTGAAAAAAGGCATTCCAATATAA",
+ "hypothetical": true,
+ "genes": [],
+ "seq_stats": {
+ "molecular_weight": 18866.325799999995,
+ "isoelectric_point": 7.696590614318848
+ },
+ "id": "IHHALPPJCH_2",
+ "locus": "IHHALP_00010"
+ }
+ ],
+ "sequences": [
+ {
+ "id": "contig_1",
+ "description": "[gcode=11] [completeness=complete] [topology=circular] [plasmid-name=unnamed1]",
+ "sequence": "TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGCGTCCAGTTTTTCGAGCAGCGTCAGGCTCTGGCTTTTTATGAATCCCGCCATGTTGAGTGCAGTTTGCTGCTGCTTGTTCATCTTTCTGTTTTCTCCGTTCTGTCTGTCATCTGCGTCGTGTGATTATATCGCGCACCACTTTTCGACCGTCTTACCGCCGGTATTCTGCCGACGGACATTTCAGTCAGACAACACTGTCACTGCCAAAAAACAGCAGTGCTTTGTTGGTAATTCGAACTTGCAGACAGGACAGGATGTGCAATTGTTATACCGCGCATACATGCACGCTATTACAATTACCCTGGTCAGGGCTTCGCCCCGACACCCCATGTCAGATACGGAGCCATGTTTTATGACAAAACGAAGTGGAAGTAATACGCGCAGGCGGGCTATCAGTCGCCCTGTTCGTCTGACGGCAGAAGAAGACCAGGAAATCAGAAAAAGGGCTGCTGAATGCGGCAAGACCGTTTCTGGTTTTTTACGGGCGGCAGCTCTCGGTAAGAAAGTTAACTCACTGACTGATGACCGGGTGCTGAAAGAAGTTATGCGACTGGGGGCGTTGCAGAAAAAACTCTTTATCGACGGCAAGCGTGTCGGGGACAGAGAGTATGCGGAGGTGCTGATCGCTATTACGGAGTATCACCGTGCCCTGTTATCCAGGCTTATGGCAGATTAGCTTCCCGGAGAGAAACTGTCGAAAACAGACGGTATGAACGCCGTAAGCCCCCAAACCGATCGCCATTCACTTTCATGCATAGCTATGCAGTGAGCTGAAAGCGATCCTGACGCATTTTTCCGGTTTACCCCGGGGAAAACATCTCTTTTTGCGGTGTCTGCGTCAGAATCGCGTTCAGCGCGTTTTGGCGGTGCGCGTAATGAGACGTTATGGTAAATGTCTTCTGGCTTGATATTATATTGGAATGCCTTTTTTCAAAGCAAATGATGTGGCTTTGGATAGAAGGTTTACGTTGATCTTATCAAAGTTTTTTTTAAAGAACGAAGCCGAGAGCTCAGATAAATCATTATATTCATCAGTTTTCGTAACTTTGTTTAATGTGTAACTTGAAAACTTCTCGCCATTAAATGACGTATAGACGTAACGATCTTTTTTTCCACCGTTAGGAATTATTAAATCAAAAAAAACATCACCCTTGCTTTTCTTTTTCTTCAAGTCGGATTCGATTTTTGAGAAAAATTCGCTCGGGCTATAAATATCAGTAGCATAGACAATAAATAAAGTTTTATCTTTATTTTTTATTGCTTCTATTTG",
+ "length": 1330,
+ "complete": true,
+ "type": "plasmid",
+ "topology": "circular",
+ "simple_id": "contig_1",
+ "orig_id": "NC_002127.1",
+ "orig_description": "Escherichia coli O157:H7 str. Sakai plasmid pOSAK1, complete sequence",
+ "name": "unnamed1"
+ }
+ ],
+ "run": {
+ "start": "2023-06-19 09:31:32",
+ "end": "2023-06-19 09:31:33"
+ },
+ "version": {
+ "bakta": "1.8.1",
+ "db": {
+ "version": "5.0",
+ "type": "full"
+ }
+ }
+}
\ No newline at end of file
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.log Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,107 @@
+Bakta v1.8.1
+Options and arguments:
+ input: /tmp/tmp6wdeee6u/files/4/2/7/dataset_4276a0b6-bcea-42e6-a1e2-9f880300f5a1.dat
+ db: /tmp/tmp6wdeee6u/job_working_directory/000/15/working/database_path, version 5.0, full
+ output: /tmp/tmp6wdeee6u/job_working_directory/000/15/working/bakta_output
+ tmp directory: /tmp/tmp6wdeee6u/tmp/tmpkzvj8z8k
+ prefix: bakta_output
+ threads: 1
+ translation table: 11
+
+parse genome sequences...
+ imported: 1
+ filtered & revised: 1
+ plasmids: 1
+
+start annotation...
+predict tRNAs...
+ found: 0
+predict tmRNAs...
+ found: 0
+predict rRNAs...
+ found: 0
+predict ncRNAs...
+ found: 0
+predict ncRNA regions...
+ found: 0
+predict CRISPR arrays...
+ found: 0
+predict & annotate CDSs...
+ predicted: 2
+ discarded spurious: 0
+ revised translational exceptions: 0
+ detected IPSs: 0
+ found PSCs: 0
+ found PSCCs: 0
+ lookup annotations...
+ conduct expert systems...
+ amrfinder: 0
+ protein sequences: 0
+ combine annotations and mark hypotheticals...
+ detect pseudogenes...
+ pseudogene candidates: 0
+ found pseudogenes: 0
+analyze hypothetical proteins: 2
+ detected Pfam hits: 0
+ calculated proteins statistics
+ revise special cases...
+extract sORF...
+ potential: 22
+ discarded due to overlaps: 2
+ discarded spurious: 0
+ detected IPSs: 0
+ found PSCs: 0
+ lookup annotations...
+ filter and combine annotations...
+ filtered sORFs: 0
+detect gaps...
+ found: 0
+detect oriCs/oriVs...
+ found: 0
+detect oriTs...
+ found: 0
+apply feature overlap filters...
+select features and create locus tags...
+selected: 2
+improve annotations...
+ revised gene symbols: 0
+
+genome statistics:
+ Genome size: 1,330 bp
+ Contigs/replicons: 1
+ GC: 45.2 %
+ N50: 1,330
+ N ratio: 0.0 %
+ coding density: 62.0 %
+
+annotation summary:
+ tRNAs: 0
+ tmRNAs: 0
+ rRNAs: 0
+ ncRNAs: 0
+ ncRNA regions: 0
+ CRISPR arrays: 0
+ CDSs: 2
+ hypotheticals: 2
+ pseudogenes: 0
+ signal peptides: 0
+ sORFs: 0
+ gaps: 0
+ oriCs/oriVs: 0
+ oriTs: 0
+
+export annotation results to: /tmp/tmp6wdeee6u/job_working_directory/000/15/working/bakta_output
+ human readable TSV...
+ GFF3...
+ INSDC GenBank & EMBL...
+ genome sequences...
+ feature nucleotide sequences...
+ translated CDS sequences...
+ circular genome plot...
+ hypothetical TSV...
+ translated hypothetical CDS sequences...
+ machine readable JSON...
+ genome and annotation summary...
+
+If you use these results please cite Bakta: https://doi.org/10.1099/mgen.0.000685
+Annotation successfully finished in 0:01 [mm:ss].
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.tsv Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,8 @@
+# Annotated with Bakta
+# Software: v1.8.1
+# Database: v5.0, full
+# DOI: 10.1099/mgen.0.000685
+# URL: github.com/oschwengers/bakta
+#Sequence Id Type Start Stop Strand Locus Tag Gene Product DbXrefs
+contig_1 cds 413 736 + IHHALP_00005 hypothetical protein
+contig_1 cds 971 141 - IHHALP_00010 hypothetical protein
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6.txt Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,30 @@
+Sequence(s):
+Length: 1330
+Count: 1
+GC: 45.2
+N50: 1330
+N ratio: 0.0
+coding density: 62.0
+
+Annotation:
+tRNAs: 0
+tmRNAs: 0
+rRNAs: 0
+ncRNAs: 0
+ncRNA regions: 0
+CRISPR arrays: 0
+CDSs: 2
+pseudogenes: 0
+hypotheticals: 2
+signal peptides: 0
+sORFs: 0
+gaps: 0
+oriCs: 0
+oriVs: 0
+oriTs: 0
+
+Bakta:
+Software: v1.8.1
+Database: v5.0, full
+DOI: 10.1099/mgen.0.000685
+URL: github.com/oschwengers/bakta
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/TEST_6/TEST_6_plot.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TEST_6/TEST_6_plot.svg Mon Jul 24 11:22:16 2023 +0000
@@ -0,0 +1,2400 @@
+
+
+
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/bakta_database.loc
--- a/test-data/bakta_database.loc Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/bakta_database.loc Mon Jul 24 11:22:16 2023 +0000
@@ -5,7 +5,6 @@
# path : final oath to bakta database on galaxy
# value, dbkey, bakta_version, path
# eg.
-# 7025248 V4.0_2022-08-29 1.5 path/to/db
V0.0_date_test 7190015 1.5 ${__HERE__}/test-db
-V0.1_2022-08-29 7197216 1.5 ${__HERE__}/test-db
-V0.2_2022-08-19 7197217 1.7 ${__HERE__}/test-db
+V4.0_2022-08-29 7197216 1.5 ${__HERE__}/test-db
+V5.0_2022-08-19 7197217 1.7 ${__HERE__}/test-db
diff -r 865ece5ca178 -r 3f0aa1b3e816 test-data/test-db/version.json
--- a/test-data/test-db/version.json Fri Feb 10 14:20:09 2023 +0000
+++ b/test-data/test-db/version.json Mon Jul 24 11:22:16 2023 +0000
@@ -1,7 +1,8 @@
{
- "date": "2022-08-25",
- "major": 4,
+ "date": "2023-02-20",
+ "major": 5,
"minor": 0,
+ "type": "full",
"dependencies": [
{
"name": "AMRFinderPlus",
diff -r 865ece5ca178 -r 3f0aa1b3e816 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Fri Feb 10 14:20:09 2023 +0000
+++ b/tool_data_table_conf.xml.sample Mon Jul 24 11:22:16 2023 +0000
@@ -2,6 +2,10 @@
value, dbkey, bakta_version, path
-
+
+
+
diff -r 865ece5ca178 -r 3f0aa1b3e816 tool_data_table_conf.xml.test
--- a/tool_data_table_conf.xml.test Fri Feb 10 14:20:09 2023 +0000
+++ b/tool_data_table_conf.xml.test Mon Jul 24 11:22:16 2023 +0000
@@ -7,6 +7,5 @@