changeset 0:12870a79d56b draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics commit 3b5d66e71ee273061f579c8715fc085ea9d0b99e
author ecology
date Fri, 26 Jul 2024 14:31:32 +0000
parents
children 9d689f8c9ce4
files sanntis.xml test-data/BGC0001472.fna.prodigal.faa.gb test-data/BGC0001472.fna.prodigal.faa.ip.tsv test-data/Sanntis_output_data.gff3
diffstat 4 files changed, 291 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sanntis.xml	Fri Jul 26 14:31:32 2024 +0000
@@ -0,0 +1,53 @@
+<tool id="sanntis_marine" name="Sanntis biosynthetic gene clusters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
+    <description>in genomic and metagenomic data</description>
+    <macros>
+        <token name="@TOOL_VERSION@">0.9.3.5</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <edam_topics>
+        <edam_topic>topic_3387</edam_topic>
+    </edam_topics>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">sanntis</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        sanntis --ip-file '$input_interpro' --outfile 'output_sanntis.gff' '$input_genbank'
+    ]]></command>
+    <inputs>
+        <param name="input_interpro" type="data" format="tabular" label="Input the TSV file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/>
+        <param name="input_genbank" type="data" format="genbank" label="Input a Genbank .gb file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/>
+    </inputs>
+    <outputs>
+        <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_interpro" value="BGC0001472.fna.prodigal.faa.ip.tsv"/>
+            <param name="input_genbank" value="BGC0001472.fna.prodigal.faa.gb"/>
+            <output name="output_sanntis" value="Sanntis_output_data.gff3"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**What it does**
+
+SMBGC Annotation using Neural Networks Trained on Interpro Signatures
+Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
+
+.....
+
+
+**Input**
+- A tsv file coming from InterProScan tool from which you can retrieve the right data.
+- The right Genbank file with the right structure that fits the protein fasta file used to run InterProScan tool
+
+**Output**
+- A Gff3 file
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1101/2023.05.23.540769</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BGC0001472.fna.prodigal.faa.gb	Fri Jul 26 14:31:32 2024 +0000
@@ -0,0 +1,155 @@
+LOCUS       BGC0001472                32 bp    DNA              UNK 01-JAN-1980
+DEFINITION  BGC0001472.
+ACCESSION   BGC0001472
+VERSION     BGC0001472
+KEYWORDS    .
+SOURCE      .
+  ORGANISM  .
+            .
+FEATURES             Location/Qualifiers
+     CDS             312..683
+                     /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP
+                     NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT
+                     QGVKNRKQARSRYGAKKEK"
+                     /protein_id="BGC0001472_1"
+     CDS             686..1156
+                     /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG
+                     AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV
+                     VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW"
+                     /protein_id="BGC0001472_2"
+     CDS             1195..3324
+                     /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH
+                     DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL
+                     DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV
+                     PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL
+                     EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV
+                     QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT
+                     FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT
+                     GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE
+                     TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG
+                     TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV
+                     GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV
+                     VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV
+                     PRNVAEEIIAKAKGE"
+                     /protein_id="BGC0001472_3"
+     CDS             3472..4665
+                     /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE
+                     ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA
+                     ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY
+                     EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT
+                     ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL
+                     LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD
+                     VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK"
+                     /protein_id="BGC0001472_4"
+     CDS             4869..5570
+                     /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS
+                     AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG
+                     FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA
+                     DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL
+                     LATTYARPPES"
+                     /protein_id="BGC0001472_5"
+     CDS             5567..7195
+                     /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG
+                     ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA
+                     MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE
+                     VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP
+                     YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI
+                     DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY
+                     YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG
+                     VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP
+                     GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD
+                     ALARLLGTSPADEPVAAFASLGGTA"
+                     /protein_id="BGC0001472_6"
+     CDS             7210..7821
+                     /translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH
+                     ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK
+                     RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN
+                     LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR"
+                     /protein_id="BGC0001472_7"
+     CDS             7845..9191
+                     /translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ
+                     LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE
+                     MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV
+                     PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL
+                     PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA
+                     ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV
+                     FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV
+                     KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA"
+                     /protein_id="BGC0001472_8"
+     CDS             9238..10437
+                     /translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT
+                     IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD
+                     EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV
+                     SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF
+                     EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS
+                     HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT
+                     SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
+                     "
+                     /protein_id="BGC0001472_9"
+     CDS             10511..10654
+                     /translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS
+                     SS"
+                     /protein_id="BGC0001472_10"
+     CDS             10977..13634
+                     /translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK
+                     NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE
+                     TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV
+                     AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG
+                     TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR
+                     MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL
+                     FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD
+                     SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA
+                     SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY
+                     TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR
+                     DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN
+                     LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF
+                     MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN
+                     VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ
+                     WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW
+                     TAAEEFFVELCTDN"
+                     /protein_id="BGC0001472_11"
+     CDS             13612..14571
+                     /translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL
+                     QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT
+                     LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG
+                     GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP
+                     KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY
+                     PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG"
+                     /protein_id="BGC0001472_12"
+     CDS             14692..15894
+                     /translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL
+                     LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI
+                     PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR
+                     AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT
+                     FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY
+                     FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL
+                     AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS
+                     W"
+                     /protein_id="BGC0001472_13"
+     CDS             16220..16564
+                     /translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS
+                     DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG
+                     LGIALEGGTQ"
+                     /protein_id="BGC0001472_14"
+     CDS             17019..17729
+                     /translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV
+                     RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR
+                     TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV
+                     FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA
+                     GIALHARARRNLSR"
+                     /protein_id="BGC0001472_15"
+     CDS             17815..19485
+                     /translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM
+                     HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT
+                     LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV
+                     TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR
+                     KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP
+                     LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP
+                     GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH
+                     TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL
+                     YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH
+                     VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR"
+                     /protein_id="BGC0001472_16"
+ORIGIN
+        1 gatcgatcga tcgatcgatc gatcgatcga tc
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BGC0001472.fna.prodigal.faa.ip.tsv	Fri Jul 26 14:31:32 2024 +0000
@@ -0,0 +1,81 @@
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	ProSitePatterns	PS00086	Cytochrome P450 cysteine heme-iron ligand signature.	342	351	-	T	13-08-2021	IPR017972	Cytochrome P450, conserved site
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00385	P450 superfamily signature	340	349	3.1E-7	T	13-08-2021	IPR001128	Cytochrome P450
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00385	P450 superfamily signature	238	255	3.1E-7	T	13-08-2021	IPR001128	Cytochrome P450
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00385	P450 superfamily signature	273	284	3.1E-7	T	13-08-2021	IPR001128	Cytochrome P450
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00385	P450 superfamily signature	349	360	3.1E-7	T	13-08-2021	IPR001128	Cytochrome P450
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	Pfam	PF00067	Cytochrome P450	272	368	4.0E-18	T	13-08-2021	IPR001128	Cytochrome P450
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	273	284	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	319	334	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	291	318	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	340	349	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	349	360	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	138	154	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	192	214	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	155	170	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	PRINTS	PR00359	B-class P450 signature	91	102	8.3E-58	T	13-08-2021	IPR002397	Cytochrome P450, B-class
+BGC0001472_13	874c0f534839f521f055a275c391567a	400	Gene3D	G3DSA:1.10.630.10	Cytochrome P450	2	400	7.0E-113	T	13-08-2021	IPR036396	Cytochrome P450 superfamily
+BGC0001472_11	67b7792659aca4f0747f903233e4f593	885	Pfam	PF04738	Lantibiotic dehydratase, N terminus	141	791	2.4E-20	T	13-08-2021	IPR006827	Lantibiotic dehydratase, N-terminal
+BGC0001472_6	76d1387ac73417cb91ccfb11c2c5229e	542	Gene3D	G3DSA:3.40.50.720	-	132	304	8.2E-16	T	13-08-2021	-	-
+BGC0001472_6	76d1387ac73417cb91ccfb11c2c5229e	542	Gene3D	G3DSA:3.40.109.10	NADH Oxidase	348	542	3.1E-35	T	13-08-2021	IPR000415	Nitroreductase-like
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Gene3D	G3DSA:3.40.50.300	-	3	304	1.4E-121	T	13-08-2021	IPR027417	P-loop containing nucleoside triphosphate hydrolase
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Pfam	PF00009	Elongation factor Tu GTP binding domain	10	294	1.2E-65	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	TIGRFAM	TIGR00231	small_GTP: small GTP-binding protein domain	11	184	1.5E-33	T	13-08-2021	IPR005225	Small GTP-binding protein domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Pfam	PF03144	Elongation factor Tu domain 2	337	404	9.3E-16	T	13-08-2021	IPR004161	Translation elongation factor EFTu-like, domain 2
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Pfam	PF00679	Elongation factor G C-terminus	615	701	2.7E-29	T	13-08-2021	IPR000640	Elongation factor EFG, domain V-like
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Pfam	PF14492	Elongation Factor G, domain III	417	491	2.5E-33	T	13-08-2021	IPR041095	Elongation Factor G, domain II
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Gene3D	G3DSA:3.30.70.870	Elongation Factor G (Translational Gtpase), domain 3	421	497	1.0E-34	T	13-08-2021	-	-
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	TIGRFAM	TIGR00484	EF-G: translation elongation factor G	5	707	0.0	T	13-08-2021	IPR004540	Translation elongation factor EFG/EF2
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Gene3D	G3DSA:3.30.230.10	-	498	703	4.6E-92	T	13-08-2021	IPR014721	Ribosomal protein S5 domain 2-type fold, subgroup
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	PRINTS	PR00315	GTP-binding elongation factor signature	13	26	4.2E-16	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	PRINTS	PR00315	GTP-binding elongation factor signature	59	67	4.2E-16	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	PRINTS	PR00315	GTP-binding elongation factor signature	83	93	4.2E-16	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	PRINTS	PR00315	GTP-binding elongation factor signature	99	110	4.2E-16	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	PRINTS	PR00315	GTP-binding elongation factor signature	135	144	4.2E-16	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Pfam	PF03764	Elongation factor G, domain IV	492	613	2.5E-47	T	13-08-2021	IPR005517	Translation elongation factor EFG/EF2, domain IV
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	ProSitePatterns	PS00301	Translational (tr)-type guanine nucleotide-binding (G) domain signature.	52	67	-	T	13-08-2021	IPR031157	Tr-type G domain, conserved site
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Gene3D	G3DSA:2.40.30.10	Translation factors	305	420	6.0E-44	T	13-08-2021	-	-
+BGC0001472_3	4b28e769738231bbe9f69d4979528f4d	709	Gene3D	G3DSA:3.30.70.240	-	619	689	4.6E-92	T	13-08-2021	-	-
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	Gene3D	G3DSA:3.40.50.300	-	1	205	3.7E-74	T	13-08-2021	IPR027417	P-loop containing nucleoside triphosphate hydrolase
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	TIGRFAM	TIGR00485	EF-Tu: translation elongation factor Tu	1	396	0.0	T	13-08-2021	IPR004541	Translation elongation factor EFTu/EF1A, bacterial/organelle
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	Pfam	PF03144	Elongation factor Tu domain 2	227	296	3.0E-17	T	13-08-2021	IPR004161	Translation elongation factor EFTu-like, domain 2
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	Pfam	PF03143	Elongation factor Tu C-terminal domain	301	395	1.4E-38	T	13-08-2021	IPR004160	Translation elongation factor EFTu/EF1A, C-terminal
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	Gene3D	G3DSA:2.40.30.10	Translation factors	208	337	2.9E-57	T	13-08-2021	-	-
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	TIGRFAM	TIGR00231	small_GTP: small GTP-binding protein domain	13	147	1.9E-13	T	13-08-2021	IPR005225	Small GTP-binding protein domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	PRINTS	PR00315	GTP-binding elongation factor signature	14	27	2.3E-24	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	PRINTS	PR00315	GTP-binding elongation factor signature	60	68	2.3E-24	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	PRINTS	PR00315	GTP-binding elongation factor signature	80	90	2.3E-24	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	PRINTS	PR00315	GTP-binding elongation factor signature	96	107	2.3E-24	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	PRINTS	PR00315	GTP-binding elongation factor signature	133	142	2.3E-24	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	Pfam	PF00009	Elongation factor Tu GTP binding domain	10	203	5.6E-57	T	13-08-2021	IPR000795	Translational (tr)-type GTP-binding domain
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	Gene3D	G3DSA:2.40.30.10	Translation factors	341	395	1.0E-25	T	13-08-2021	-	-
+BGC0001472_4	d768d0b7047f823230c36d45b2e27c7f	397	ProSitePatterns	PS00301	Translational (tr)-type guanine nucleotide-binding (G) domain signature.	53	68	-	T	13-08-2021	IPR031157	Tr-type G domain, conserved site
+BGC0001472_12	80b32fd90b93d2d340ddcffd78658c6b	319	Pfam	PF14028	Lantibiotic biosynthesis dehydratase C-term	16	314	2.0E-49	T	13-08-2021	IPR023809	Thiopeptide-type bacteriocin biosynthesis domain
+BGC0001472_16	c1b339f48f233f90c5ac174024b991af	556	Gene3D	G3DSA:3.40.50.10320	-	14	220	1.0E-8	T	13-08-2021	IPR024078	Putative deacetylase LmbE-like domain superfamily
+BGC0001472_16	c1b339f48f233f90c5ac174024b991af	556	Pfam	PF02585	GlcNAc-PI de-N-acetylase	52	145	9.7E-10	T	13-08-2021	IPR003737	N-acetylglucosaminyl phosphatidylinositol deacetylase-related
+BGC0001472_16	c1b339f48f233f90c5ac174024b991af	556	Gene3D	G3DSA:2.120.10.70	-	318	498	6.7E-7	T	13-08-2021	-	-
+BGC0001472_2	fc82bb58d52c83068b7ca785129b2384	156	TIGRFAM	TIGR01029	rpsG_bact: ribosomal protein uS7	3	156	4.4E-64	T	13-08-2021	IPR005717	Ribosomal protein S7, bacterial/organellar-type
+BGC0001472_2	fc82bb58d52c83068b7ca785129b2384	156	Pfam	PF00177	Ribosomal protein S7p/S5e	1	149	4.0E-59	T	13-08-2021	IPR023798	Ribosomal protein S7 domain
+BGC0001472_2	fc82bb58d52c83068b7ca785129b2384	156	Gene3D	G3DSA:1.10.455.10	Ribosomal protein S7 domain	1	155	7.0E-60	T	13-08-2021	IPR036823	Ribosomal protein S7 domain superfamily
+BGC0001472_2	fc82bb58d52c83068b7ca785129b2384	156	ProSitePatterns	PS00052	Ribosomal protein S7 signature.	20	46	-	T	13-08-2021	IPR020606	Ribosomal protein S7, conserved site
+BGC0001472_15	206c74fd5c80ef02123ab090a4b6cfa4	236	Pfam	PF04705	Thiostrepton-resistance methylase, N terminus	1	82	5.8E-30	T	13-08-2021	IPR006795	Thiostrepton-resistance methylase, N-terminal
+BGC0001472_15	206c74fd5c80ef02123ab090a4b6cfa4	236	Gene3D	G3DSA:3.40.1280.10	-	75	235	1.3E-37	T	13-08-2021	IPR029026	tRNA (guanine-N1-)-methyltransferase, N-terminal
+BGC0001472_15	206c74fd5c80ef02123ab090a4b6cfa4	236	Gene3D	G3DSA:3.30.1330.30	-	1	73	2.3E-26	T	13-08-2021	IPR029064	50S ribosomal protein L30e-like
+BGC0001472_15	206c74fd5c80ef02123ab090a4b6cfa4	236	Pfam	PF00588	SpoU rRNA Methylase family	88	227	1.8E-26	T	13-08-2021	IPR001537	tRNA/rRNA methyltransferase, SpoU type
+BGC0001472_8	2149eda482fc77a076bb0eb91c55bd5d	448	Gene3D	G3DSA:3.30.40.250	-	104	186	3.5E-36	T	13-08-2021	-	-
+BGC0001472_8	2149eda482fc77a076bb0eb91c55bd5d	448	TIGRFAM	TIGR03604	TOMM_cyclo_SagD: thiazole/oxazole-forming peptide maturase, SagD family component	75	448	1.4E-100	T	13-08-2021	IPR027624	Thiazole/oxazole-forming peptide maturase, SagD family component
+BGC0001472_8	2149eda482fc77a076bb0eb91c55bd5d	448	Pfam	PF02624	YcaO cyclodehydratase, ATP-ad Mg2+-binding	75	406	8.3E-62	T	13-08-2021	IPR003776	YcaO-like domain
+BGC0001472_8	2149eda482fc77a076bb0eb91c55bd5d	448	Gene3D	G3DSA:3.30.1330.230	-	82	405	3.5E-36	T	13-08-2021	-	-
+BGC0001472_8	2149eda482fc77a076bb0eb91c55bd5d	448	Gene3D	G3DSA:3.30.160.660	-	223	357	3.5E-36	T	13-08-2021	-	-
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	TIGRFAM	TIGR00981	rpsL_bact: ribosomal protein uS12	1	123	5.4E-69	T	13-08-2021	IPR005679	Ribosomal protein S12, bacterial-type
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	ProSitePatterns	PS00055	Ribosomal protein S12 signature.	43	50	-	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	Pfam	PF00164	Ribosomal protein S12/S23	12	123	8.3E-44	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	PRINTS	PR01034	Ribosomal protein S12 signature	27	42	8.4E-59	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	PRINTS	PR01034	Ribosomal protein S12 signature	42	57	8.4E-59	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	PRINTS	PR01034	Ribosomal protein S12 signature	58	77	8.4E-59	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	PRINTS	PR01034	Ribosomal protein S12 signature	77	94	8.4E-59	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	PRINTS	PR01034	Ribosomal protein S12 signature	94	110	8.4E-59	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	PRINTS	PR01034	Ribosomal protein S12 signature	110	122	8.4E-59	T	13-08-2021	IPR006032	Ribosomal protein S12/S23
+BGC0001472_1	f4269c94863705a842e7252b96e5f27d	123	Gene3D	G3DSA:2.40.50.140	-	1	123	1.6E-66	T	13-08-2021	-	-
+BGC0001472_7	80ec0c524f263f553a78952ff4408537	203	Gene3D	G3DSA:3.40.50.720	-	16	185	5.5E-22	T	13-08-2021	-	-
+BGC0001472_14	b47c649341e9af373f88df5f17e9dc46	114	Gene3D	G3DSA:3.30.70.100	-	1	92	1.6E-30	T	13-08-2021	-	-
+BGC0001472_5	8eb61811b90411be4123c98a64e16860	233	Gene3D	G3DSA:3.40.109.10	NADH Oxidase	12	230	5.1E-18	T	13-08-2021	IPR000415	Nitroreductase-like
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Sanntis_output_data.gff3	Fri Jul 26 14:31:32 2024 +0000
@@ -0,0 +1,2 @@
+##gff-version 3
+BGC0001472	SanntiSv0.9.3.5	CLUSTER	312	19485	.	.	.	ID=BGC0001472_sanntis_1;nearest_MiBIG=BGC0001472;nearest_MiBIG_class=RiPP;nearest_MiBIG_diceDistance=0.037;score=0.900;partial=11