Mercurial > repos > ecology > sanntis_marine
changeset 0:12870a79d56b draft
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics commit 3b5d66e71ee273061f579c8715fc085ea9d0b99e
author | ecology |
---|---|
date | Fri, 26 Jul 2024 14:31:32 +0000 |
parents | |
children | 9d689f8c9ce4 |
files | sanntis.xml test-data/BGC0001472.fna.prodigal.faa.gb test-data/BGC0001472.fna.prodigal.faa.ip.tsv test-data/Sanntis_output_data.gff3 |
diffstat | 4 files changed, 291 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sanntis.xml Fri Jul 26 14:31:32 2024 +0000 @@ -0,0 +1,53 @@ +<tool id="sanntis_marine" name="Sanntis biosynthetic gene clusters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> + <description>in genomic and metagenomic data</description> + <macros> + <token name="@TOOL_VERSION@">0.9.3.5</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <edam_topics> + <edam_topic>topic_3387</edam_topic> + </edam_topics> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">sanntis</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + sanntis --ip-file '$input_interpro' --outfile 'output_sanntis.gff' '$input_genbank' + ]]></command> + <inputs> + <param name="input_interpro" type="data" format="tabular" label="Input the TSV file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/> + <param name="input_genbank" type="data" format="genbank" label="Input a Genbank .gb file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/> + </inputs> + <outputs> + <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input_interpro" value="BGC0001472.fna.prodigal.faa.ip.tsv"/> + <param name="input_genbank" value="BGC0001472.fna.prodigal.faa.gb"/> + <output name="output_sanntis" value="Sanntis_output_data.gff3"/> + </test> + </tests> + <help><![CDATA[ + +.. class:: infomark + +**What it does** + +SMBGC Annotation using Neural Networks Trained on Interpro Signatures +Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data + +..... + + +**Input** +- A tsv file coming from InterProScan tool from which you can retrieve the right data. +- The right Genbank file with the right structure that fits the protein fasta file used to run InterProScan tool + +**Output** +- A Gff3 file + + ]]></help> + <citations> + <citation type="doi">10.1101/2023.05.23.540769</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BGC0001472.fna.prodigal.faa.gb Fri Jul 26 14:31:32 2024 +0000 @@ -0,0 +1,155 @@ +LOCUS BGC0001472 32 bp DNA UNK 01-JAN-1980 +DEFINITION BGC0001472. +ACCESSION BGC0001472 +VERSION BGC0001472 +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + CDS 312..683 + /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP + NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT + QGVKNRKQARSRYGAKKEK" + /protein_id="BGC0001472_1" + CDS 686..1156 + /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG + AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV + VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW" + /protein_id="BGC0001472_2" + CDS 1195..3324 + /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH + DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL + DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV + PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL + EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV + QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT + FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT + GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE + TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG + TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV + GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV + VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV + PRNVAEEIIAKAKGE" + /protein_id="BGC0001472_3" + CDS 3472..4665 + /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE + ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA + ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY + EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT + ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL + LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD + VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK" + /protein_id="BGC0001472_4" + CDS 4869..5570 + /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS + AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG + FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA + DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL + LATTYARPPES" + /protein_id="BGC0001472_5" + CDS 5567..7195 + /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG + ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA + MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE + VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP + YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI + DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY + YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG + VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP + GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD + ALARLLGTSPADEPVAAFASLGGTA" + /protein_id="BGC0001472_6" + CDS 7210..7821 + /translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH + ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK + RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN + LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR" + /protein_id="BGC0001472_7" + CDS 7845..9191 + /translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ + LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE + MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV + PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL + PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA + ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV + FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV + KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA" + /protein_id="BGC0001472_8" + CDS 9238..10437 + /translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT + IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD + EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV + SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF + EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS + HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT + SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE + " + /protein_id="BGC0001472_9" + CDS 10511..10654 + /translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS + SS" + /protein_id="BGC0001472_10" + CDS 10977..13634 + /translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK + NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE + TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV + AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG + TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR + MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL + FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD + SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA + SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY + TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR + DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN + LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF + MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN + VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ + WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW + TAAEEFFVELCTDN" + /protein_id="BGC0001472_11" + CDS 13612..14571 + /translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL + QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT + LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG + GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP + KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY + PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG" + /protein_id="BGC0001472_12" + CDS 14692..15894 + /translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL + LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI + PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR + AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT + FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY + FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL + AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS + W" + /protein_id="BGC0001472_13" + CDS 16220..16564 + /translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS + DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG + LGIALEGGTQ" + /protein_id="BGC0001472_14" + CDS 17019..17729 + /translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV + RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR + TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV + FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA + GIALHARARRNLSR" + /protein_id="BGC0001472_15" + CDS 17815..19485 + /translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM + HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT + LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV + TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR + KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP + LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP + GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH + TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL + YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH + VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR" + /protein_id="BGC0001472_16" +ORIGIN + 1 gatcgatcga tcgatcgatc gatcgatcga tc +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BGC0001472.fna.prodigal.faa.ip.tsv Fri Jul 26 14:31:32 2024 +0000 @@ -0,0 +1,81 @@ +BGC0001472_13 874c0f534839f521f055a275c391567a 400 ProSitePatterns PS00086 Cytochrome P450 cysteine heme-iron ligand signature. 342 351 - T 13-08-2021 IPR017972 Cytochrome P450, conserved site +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 340 349 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 238 255 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 273 284 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00385 P450 superfamily signature 349 360 3.1E-7 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 Pfam PF00067 Cytochrome P450 272 368 4.0E-18 T 13-08-2021 IPR001128 Cytochrome P450 +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 273 284 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 319 334 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 291 318 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 340 349 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 349 360 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 138 154 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 192 214 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 155 170 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 PRINTS PR00359 B-class P450 signature 91 102 8.3E-58 T 13-08-2021 IPR002397 Cytochrome P450, B-class +BGC0001472_13 874c0f534839f521f055a275c391567a 400 Gene3D G3DSA:1.10.630.10 Cytochrome P450 2 400 7.0E-113 T 13-08-2021 IPR036396 Cytochrome P450 superfamily +BGC0001472_11 67b7792659aca4f0747f903233e4f593 885 Pfam PF04738 Lantibiotic dehydratase, N terminus 141 791 2.4E-20 T 13-08-2021 IPR006827 Lantibiotic dehydratase, N-terminal +BGC0001472_6 76d1387ac73417cb91ccfb11c2c5229e 542 Gene3D G3DSA:3.40.50.720 - 132 304 8.2E-16 T 13-08-2021 - - +BGC0001472_6 76d1387ac73417cb91ccfb11c2c5229e 542 Gene3D G3DSA:3.40.109.10 NADH Oxidase 348 542 3.1E-35 T 13-08-2021 IPR000415 Nitroreductase-like +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.40.50.300 - 3 304 1.4E-121 T 13-08-2021 IPR027417 P-loop containing nucleoside triphosphate hydrolase +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF00009 Elongation factor Tu GTP binding domain 10 294 1.2E-65 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 TIGRFAM TIGR00231 small_GTP: small GTP-binding protein domain 11 184 1.5E-33 T 13-08-2021 IPR005225 Small GTP-binding protein domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF03144 Elongation factor Tu domain 2 337 404 9.3E-16 T 13-08-2021 IPR004161 Translation elongation factor EFTu-like, domain 2 +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF00679 Elongation factor G C-terminus 615 701 2.7E-29 T 13-08-2021 IPR000640 Elongation factor EFG, domain V-like +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF14492 Elongation Factor G, domain III 417 491 2.5E-33 T 13-08-2021 IPR041095 Elongation Factor G, domain II +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.70.870 Elongation Factor G (Translational Gtpase), domain 3 421 497 1.0E-34 T 13-08-2021 - - +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 TIGRFAM TIGR00484 EF-G: translation elongation factor G 5 707 0.0 T 13-08-2021 IPR004540 Translation elongation factor EFG/EF2 +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.230.10 - 498 703 4.6E-92 T 13-08-2021 IPR014721 Ribosomal protein S5 domain 2-type fold, subgroup +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 13 26 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 59 67 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 83 93 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 99 110 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 PRINTS PR00315 GTP-binding elongation factor signature 135 144 4.2E-16 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Pfam PF03764 Elongation factor G, domain IV 492 613 2.5E-47 T 13-08-2021 IPR005517 Translation elongation factor EFG/EF2, domain IV +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 ProSitePatterns PS00301 Translational (tr)-type guanine nucleotide-binding (G) domain signature. 52 67 - T 13-08-2021 IPR031157 Tr-type G domain, conserved site +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:2.40.30.10 Translation factors 305 420 6.0E-44 T 13-08-2021 - - +BGC0001472_3 4b28e769738231bbe9f69d4979528f4d 709 Gene3D G3DSA:3.30.70.240 - 619 689 4.6E-92 T 13-08-2021 - - +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:3.40.50.300 - 1 205 3.7E-74 T 13-08-2021 IPR027417 P-loop containing nucleoside triphosphate hydrolase +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 TIGRFAM TIGR00485 EF-Tu: translation elongation factor Tu 1 396 0.0 T 13-08-2021 IPR004541 Translation elongation factor EFTu/EF1A, bacterial/organelle +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF03144 Elongation factor Tu domain 2 227 296 3.0E-17 T 13-08-2021 IPR004161 Translation elongation factor EFTu-like, domain 2 +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF03143 Elongation factor Tu C-terminal domain 301 395 1.4E-38 T 13-08-2021 IPR004160 Translation elongation factor EFTu/EF1A, C-terminal +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:2.40.30.10 Translation factors 208 337 2.9E-57 T 13-08-2021 - - +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 TIGRFAM TIGR00231 small_GTP: small GTP-binding protein domain 13 147 1.9E-13 T 13-08-2021 IPR005225 Small GTP-binding protein domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 14 27 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 60 68 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 80 90 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 96 107 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 PRINTS PR00315 GTP-binding elongation factor signature 133 142 2.3E-24 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Pfam PF00009 Elongation factor Tu GTP binding domain 10 203 5.6E-57 T 13-08-2021 IPR000795 Translational (tr)-type GTP-binding domain +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 Gene3D G3DSA:2.40.30.10 Translation factors 341 395 1.0E-25 T 13-08-2021 - - +BGC0001472_4 d768d0b7047f823230c36d45b2e27c7f 397 ProSitePatterns PS00301 Translational (tr)-type guanine nucleotide-binding (G) domain signature. 53 68 - T 13-08-2021 IPR031157 Tr-type G domain, conserved site +BGC0001472_12 80b32fd90b93d2d340ddcffd78658c6b 319 Pfam PF14028 Lantibiotic biosynthesis dehydratase C-term 16 314 2.0E-49 T 13-08-2021 IPR023809 Thiopeptide-type bacteriocin biosynthesis domain +BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Gene3D G3DSA:3.40.50.10320 - 14 220 1.0E-8 T 13-08-2021 IPR024078 Putative deacetylase LmbE-like domain superfamily +BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Pfam PF02585 GlcNAc-PI de-N-acetylase 52 145 9.7E-10 T 13-08-2021 IPR003737 N-acetylglucosaminyl phosphatidylinositol deacetylase-related +BGC0001472_16 c1b339f48f233f90c5ac174024b991af 556 Gene3D G3DSA:2.120.10.70 - 318 498 6.7E-7 T 13-08-2021 - - +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 TIGRFAM TIGR01029 rpsG_bact: ribosomal protein uS7 3 156 4.4E-64 T 13-08-2021 IPR005717 Ribosomal protein S7, bacterial/organellar-type +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 Pfam PF00177 Ribosomal protein S7p/S5e 1 149 4.0E-59 T 13-08-2021 IPR023798 Ribosomal protein S7 domain +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 Gene3D G3DSA:1.10.455.10 Ribosomal protein S7 domain 1 155 7.0E-60 T 13-08-2021 IPR036823 Ribosomal protein S7 domain superfamily +BGC0001472_2 fc82bb58d52c83068b7ca785129b2384 156 ProSitePatterns PS00052 Ribosomal protein S7 signature. 20 46 - T 13-08-2021 IPR020606 Ribosomal protein S7, conserved site +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Pfam PF04705 Thiostrepton-resistance methylase, N terminus 1 82 5.8E-30 T 13-08-2021 IPR006795 Thiostrepton-resistance methylase, N-terminal +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Gene3D G3DSA:3.40.1280.10 - 75 235 1.3E-37 T 13-08-2021 IPR029026 tRNA (guanine-N1-)-methyltransferase, N-terminal +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Gene3D G3DSA:3.30.1330.30 - 1 73 2.3E-26 T 13-08-2021 IPR029064 50S ribosomal protein L30e-like +BGC0001472_15 206c74fd5c80ef02123ab090a4b6cfa4 236 Pfam PF00588 SpoU rRNA Methylase family 88 227 1.8E-26 T 13-08-2021 IPR001537 tRNA/rRNA methyltransferase, SpoU type +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.40.250 - 104 186 3.5E-36 T 13-08-2021 - - +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 TIGRFAM TIGR03604 TOMM_cyclo_SagD: thiazole/oxazole-forming peptide maturase, SagD family component 75 448 1.4E-100 T 13-08-2021 IPR027624 Thiazole/oxazole-forming peptide maturase, SagD family component +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Pfam PF02624 YcaO cyclodehydratase, ATP-ad Mg2+-binding 75 406 8.3E-62 T 13-08-2021 IPR003776 YcaO-like domain +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.1330.230 - 82 405 3.5E-36 T 13-08-2021 - - +BGC0001472_8 2149eda482fc77a076bb0eb91c55bd5d 448 Gene3D G3DSA:3.30.160.660 - 223 357 3.5E-36 T 13-08-2021 - - +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 TIGRFAM TIGR00981 rpsL_bact: ribosomal protein uS12 1 123 5.4E-69 T 13-08-2021 IPR005679 Ribosomal protein S12, bacterial-type +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 ProSitePatterns PS00055 Ribosomal protein S12 signature. 43 50 - T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 Pfam PF00164 Ribosomal protein S12/S23 12 123 8.3E-44 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 27 42 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 42 57 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 58 77 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 77 94 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 94 110 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 PRINTS PR01034 Ribosomal protein S12 signature 110 122 8.4E-59 T 13-08-2021 IPR006032 Ribosomal protein S12/S23 +BGC0001472_1 f4269c94863705a842e7252b96e5f27d 123 Gene3D G3DSA:2.40.50.140 - 1 123 1.6E-66 T 13-08-2021 - - +BGC0001472_7 80ec0c524f263f553a78952ff4408537 203 Gene3D G3DSA:3.40.50.720 - 16 185 5.5E-22 T 13-08-2021 - - +BGC0001472_14 b47c649341e9af373f88df5f17e9dc46 114 Gene3D G3DSA:3.30.70.100 - 1 92 1.6E-30 T 13-08-2021 - - +BGC0001472_5 8eb61811b90411be4123c98a64e16860 233 Gene3D G3DSA:3.40.109.10 NADH Oxidase 12 230 5.1E-18 T 13-08-2021 IPR000415 Nitroreductase-like
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Sanntis_output_data.gff3 Fri Jul 26 14:31:32 2024 +0000 @@ -0,0 +1,2 @@ +##gff-version 3 +BGC0001472 SanntiSv0.9.3.5 CLUSTER 312 19485 . . . ID=BGC0001472_sanntis_1;nearest_MiBIG=BGC0001472;nearest_MiBIG_class=RiPP;nearest_MiBIG_diceDistance=0.037;score=0.900;partial=11