Next changeset 1:96c0067106ec (2023-07-11) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dram commit df10ba86507266a6a6f83c9bbefb7191a41b46f5 |
added:
dram_merge_annotations.xml macros.xml test-data/annotate_custom.fasta test-data/annotate_custom.hmm test-data/annotated1.genbank test-data/annotated1.gff test-data/annotated1.tabular test-data/annotated1_genes_faa.fasta test-data/annotated1_genes_fna.fasta test-data/annotated1_rrnas.tabular test-data/annotated1_scaffold.fasta test-data/annotated1_trnas.tabular test-data/annotated2.genbank test-data/annotated2.gff test-data/annotated2.tabular test-data/annotated2_genes_faa.fasta test-data/annotated2_genes_fna.fasta test-data/annotated2_rrnas.tabular test-data/annotated2_scaffold.fasta test-data/annotated2_trnas.tabular test-data/distill_custom.tabular test-data/input_annotate1.fasta.gz test-data/input_distill_rrna1.tabular test-data/input_distill_trna1.tabular test-data/neighborhoods_genes_loc1.fna test-data/neighborhoods_output1.fasta test-data/neighborhoods_scaffolds_loc1.fasta test-data/strainer_input_fasta1.fasta.gz test-data/strainer_output1.fasta |
b |
diff -r 000000000000 -r 2675f8d7b2a5 dram_merge_annotations.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dram_merge_annotations.xml Sat Dec 10 21:14:28 2022 +0000 |
[ |
b'@@ -0,0 +1,195 @@\n+<tool id="dram_merge_annotations" name="DRAM merge multiple annotations" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+ <description>into a single set</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <expand macro="requirements"/>\n+ <command detect_errors="exit_code"><![CDATA[\n+## DRAM expects a specific file name for each file in the set.\n+#set annotations_file_name = \'annotations.tsv\'\n+#set genbank_file_name = \'genbank\'\n+#set trnas_file_name = \'trnas.tsv\'\n+#set rrnas_file_name = \'rrnas.tsv\'\n+#set scaffolds_file_name = \'scaffolds.fna\'\n+#set genes_gff_file_name = \'genes.gff\'\n+#set genes_fna_file_name = \'genes.fna\'\n+#set genes_faa_file_name = \'genes.faa\'\n+\n+## DRAM expects each annotation set to be in a different directory.\n+#set input_dirs = list()\n+#for $index, $item in enumerate($annotations_collection):\n+ #set dir_name = \'input_dir_\' + str($index)\n+ mkdir \'$dir_name\' &&\n+ ln -s \'$item\' \'$dir_name/$annotations_file_name\' &&\n+ ln -s \'$genbank_collection[$index]\' \'$dir_name/$genbank_file_name\' &&\n+ #if $trnas_collection:\n+ ln -s \'$trnas_collection[$index]\' \'$dir_name/$trnas_file_name\' &&\n+ #end if\n+ #if $rrnas_collection\n+ ln -s \'$rrnas_collection[$index]\' \'$dir_name/$rrnas_file_name\' &&\n+ #end if\n+ ln -s \'$scaffolds_collection[$index]\' \'$dir_name/$scaffolds_file_name\' &&\n+ ln -s \'$genes_gff_collection[$index]\' \'$dir_name/$genes_gff_file_name\' &&\n+ ln -s \'$genes_fna_collection[$index]\' \'$dir_name/$genes_fna_file_name\' &&\n+ ln -s \'$genes_faa_collection[$index]\' \'$dir_name/$genes_faa_file_name\' &&\n+ $input_dirs.append($dir_name)\n+#end for\n+\n+DRAM.py merge_annotations\n+--input_dirs \'input_dir*\'\n+--output_dir \'output_dir\'\n+&& test -f \'output_dir/genes.faa\' && mv \'output_dir/genes.faa\' \'$output_genes_faa\' || echo \'No genes.faa output produced\'\n+&& test -f \'output_dir/genes.fna\' && mv \'output_dir/genes.fna\' \'$output_genes_fna\' || echo \'No genes.fna output produced\'\n+&& test -f \'output_dir/genes.gff\' && mv \'output_dir/genes.gff\' \'$output_genes_gff\' || echo \'No genes.gff output produced\'\n+&& test -f \'output_dir/scaffolds.fna\' && mv \'output_dir/scaffolds.fna\' \'$output_scaffolds_fna\' || echo \'No scaffolds.fna output produced\'\n+#if $rrnas_collection:\n+ && test -f \'output_dir/rrnas.tsv\' && mv \'output_dir/rrnas.tsv\' \'$output_rrnas\' || echo \'No rrnas.tsv output produced\'\n+#end if\n+#if $trnas_collection:\n+ && test -f \'output_dir/trnas.tsv\' && mv \'output_dir/trnas.tsv\' \'$output_trnas\' || echo \'No trnas.tsv output produced\'\n+#end if\n+&& test -f \'output_dir/annotations.tsv\' && mv \'output_dir/annotations.tsv\' \'$output_annotations\' || echo \'No annotations.tsv output produced\'\n+ ]]></command>\n+ <inputs>\n+ <param name="annotations_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of annotation files"/>\n+ <param name="genbank_collection" type="data_collection" format="genbank" collection_type="list" label="Collection of genbank files"/>\n+ <param name="trnas_collection" type="data_collection" format="tabular" collection_type="list" optional="true" label="Collection of trna files"/>\n+ <param name="rrnas_collection" type="data_collection" format="tabular" collection_type="list" optional="true" label="Collection of rrna files"/>\n+ <param name="scaffolds_collection" type="data_collection" format="fasta" collection_type="list" label="Collection of scaffolds files"/>\n+ <param name="genes_gff_collection" type="data_collection" format="gff3" collection_type="list" label="Collection of genes.gff files"/>\n+ <param name="genes_fna_collection" type="data_collection" format="fasta" collection_type="list" label="Collection of genes.fna files"/>\n+ <param name="genes_faa_collection" type="data_collection" format="fasta" collection_type="list" label="Collection of genes.faa files"/>\n+ </inputs>\n+ <outputs>\n+ '..b' <element name="annotated2" value="annotated2_trnas.tabular"/>\n+ </collection>\n+ </param>\n+ <param name="rrnas_collection">\n+ <collection type="list">\n+ <element name="annotated1" value="annotated1_rrnas.tabular"/>\n+ <element name="annotated2" value="annotated2_rrnas.tabular"/>\n+ </collection>\n+ </param>\n+ <param name="scaffolds_collection">\n+ <collection type="list">\n+ <element name="annotated1" value="annotated1_scaffold.fasta"/>\n+ <element name="annotated2" value="annotated2_scaffold.fasta"/>\n+ </collection>\n+ </param>\n+ <param name="genes_gff_collection">\n+ <collection type="list">\n+ <element name="annotated1" value="annotated1.gff"/>\n+ <element name="annotated2" value="annotated2.gff"/>\n+ </collection>\n+ </param>\n+ <param name="genes_fna_collection">\n+ <collection type="list">\n+ <element name="annotated1" value="annotated1_genes_fna.fasta"/>\n+ <element name="annotated2" value="annotated2_genes_fna.fasta"/>\n+ </collection>\n+ </param>\n+ <param name="genes_faa_collection">\n+ <collection type="list">\n+ <element name="annotated1" value="annotated1_genes_faa.fasta"/>\n+ <element name="annotated2" value="annotated2_genes_faa.fasta"/>\n+ </collection>\n+ </param>\n+ <output name="output_annotations">\n+ <assert_contents>\n+ <has_n_lines n="530" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ <output_collection name="output_genbank" type="list" count="2">\n+ <element name="annotation_0.gbk" ftype="genbank">\n+ <assert_contents>\n+ <has_text text="LOCUS"/>\n+ </assert_contents>\n+ </element>\n+ <element name="annotation_1.gbk" ftype="genbank">\n+ <assert_contents>\n+ <has_text text="LOCUS"/>\n+ </assert_contents>\n+ </element>\n+ </output_collection>\n+ <output name="output_trnas">\n+ <assert_contents>\n+ <has_n_lines n="8" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ <output name="output_rrnas">\n+ <assert_contents>\n+ <has_n_lines n="3" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ <output name="output_scaffolds_fna">\n+ <assert_contents>\n+ <has_n_lines n="4" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ <output name="output_genes_gff">\n+ <assert_contents>\n+ <has_n_lines n="23" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ <output name="output_genes_fna">\n+ <assert_contents>\n+ <has_n_lines n="26" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ <output name="output_genes_faa">\n+ <assert_contents>\n+ <has_n_lines n="26" delta="1"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ <help>\n+**What it does**\n+ \n+@WHATITDOESHEADER@\n+\n+This tool accepts collections of the components of a DRAM annotations (i.e., annotations, genbank files, GFF files, tRNAs,\n+rRNas, etc) and, except for the genbank files which remain a collection, merges the set of files for each component into\n+a single file.\n+\n+@WHATITDOESFOOTER@\n+ </help>\n+ <expand macro="citations"/>\n+</tool>\n' |
b |
diff -r 000000000000 -r 2675f8d7b2a5 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,72 @@ +<macros> + <token name="@TOOL_VERSION@">1.3.5</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">20.09</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">dram</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <regex match="FileNotFoundError" source="stderr" level="fatal"/> + <regex match="returned non-zero exit status" source="stdout" level="fatal"/> + <regex match="returned non-zero exit status" source="stderr" level="fatal"/> + <regex match="Invalid file path or buffer object type" source="stderr" level="fatal"/> + <exit_code range="1:" level="fatal"/> + </stdio> + </xml> + <xml name="categories_param"> + <param argument="--categories" type="text" value="" label="Distillate categories" help="Optional, leave blank to ignore"> + <expand macro="sanitizer"/> + </param> + </xml> + <xml name="custom_distillate_param"> + <param argument="--custom_distillate" type="data" format="tabular" optional="true" label="File containing a custom distillate form" help="Optional, leave blank to ignore"/> + </xml> + <xml name="genes_param"> + <param argument="--genes" type="text" value="" label="Space-separated list of genes to keep" help="Optional, leave blank to ignore"> + <expand macro="sanitizer"/> + </param> + </xml> + <xml name="identifiers_param"> + <param argument="--identifiers" type="text" value="" label="Database identifiers" help="Optional, leave blank to ignore"> + <expand macro="sanitizer"/> + </param> + </xml> + <xml name="input_file_param"> + <param argument="--input_file" type="data" format="tabular" label="Annotations file" help="Produced by the DRAM annotate tool"/> + </xml> + <xml name="sanitizer"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </xml> + <token name="@WHATITDOESHEADER@">DRAM (Distilled and Refined Annotation of Metabolism) is a tool for annotating metagenomic assembled genomes and VirSorter identified viral contigs. DRAM annotates MAGs and viral contigs using UniRef90, PFAM, dbCAN, RefSeq viral, VOGDB and the MEROPS peptidase database.</token> + <token name="@CUSTOMDISTILLATEFILES@">**Using Custom Distillate files** + +The custom distillate sheet must be a tabular file with all the columns specified below. This sheet is an extension +of the genome_summary_form.tsv file that is installed with the dram databases and which is one of several key data files +that characterizes the distillate. The genome_summary_form.tsv file is available for viewing here +https://github.com/WrightonLabCSU/DRAM/blob/master/data/genome_summary_form.tsv. The custom distillate sheet must +contain the following columns. + + * gene_id: the KO ids of the genes in which you are interested + * gene_description: descriptions of the geans + * module: the name of your module that you are adding + * sheet: the name you would like on the excel sheet in which your results appear + * header: the header that will appear in the dram metabolism summary + * subheader: the sub-header that will appear in the metabolism summary</token> + <token name="@WHATITDOESFOOTER@">More information about DRAM can be found here https://github.com/shafferm/DRAM/wiki.</token> + <xml name="citations"> + <citations> + <citation type="doi">10.1186/s13104-016-1900-2</citation> + </citations> + </xml> +</macros> + |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotate_custom.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotate_custom.fasta Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,83 @@ +>sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-001R PE=4 SV=1 +MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS +EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD +AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL +EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD +SFRKIYTDLGWKFTPL +>sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-002L PE=4 SV=1 +MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR +IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL +AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC +KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML +DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK +VMFFVAGAVLVAILISTVRW +>sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-002R PE=4 SV=1 +MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL +QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT +FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD +LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET +YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY +STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS +GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI +QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC +>sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-003L PE=4 SV=1 +MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT +PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS +TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI +>sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-003R PE=3 SV=1 +MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD +RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI +FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ +PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD +AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR +TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA +LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR +KAKIQEMFDNMVSRMVTS +>sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-004R PE=4 SV=1 +MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY +>sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-005L PE=3 SV=1 +MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL +CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC +KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH +QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY +>sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-005R PE=4 SV=1 +MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS +NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED +QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT +REFVDKDAQEFQDFLNSLDASLLS +>sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-006L PE=3 SV=1 +MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL +IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII +>fcresfdr +MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL +>BUSCOaEOG7B0HST +MAADQAQFQQLLVSLLSTDNEVRKQAEEAYNNLPVESKVTFLLGAIANGQLSEEVRQLAA +VLLRRLFSSEFLEFYKKLPAEAQAQLKEQILLAVQQEVSEQLRRKVCEVVAEVARNLIDE +DGNNQWPEFLQFLFQCANSPSPQLKESALRIFTSVPGIFGNQEAQYLDLIKQMLAKSLED +TEDAEVRLQAVRAVGAFILLHDKEKEIQKHFADLLPALLQVVAESIEKQDDDALLKVLID +LAEATPKFLRPQLETILELCLKVLSEEDVEDSWRHLALEVLVTLAETAPAMVRKRAEKYI +VALVPLVLKMMTDLEEDEDWSVADEITEDDNDSNNVVAESALDRLACGLGGKVVLPLVVE +AIPAMLSSSDWKKRHAALMAISAIGEGCHKQMEALLDQVLDGVLKYLQDPHPRVRYAACN +AIGQMSTDFAPIFEKKFHDKVIPGLLLLLDDEANPRVQAHAGAALVNFSEDCPKNILTRY +LDAIMAKLEAILTSKFKELVEKGTKLVLEQVVTTIASVADTAEEEFVAYYDRLMPCLKYI +IQNANSEELKLLRGKTIECVSLIGLAVGREKFIADASEVMDLLLKTHTEGAELPDDDPQT +SYLISAWARICKILGKQFEQYLPLVMGPVLRTASLKPEVALLDNEDLEDIEGDVDWQFVS +LGEQQNFGIRTAGLEDKASACEMLVCYARELKEGFAEYAEEVVRLMVPLLKFYFHDGVRT +AAAESLPYLLDCAKIKGPQYLEGMWAYICPELLKAIDTEPEKEVLSELLSSLAKCIETLG +AGCLSEEALKELLRILDKLLKEHFERAEKRLEKRKDEDYDEVVEEELAEEDDEDVYILSK +VADILHALFATYKEAFLPAFDQVVPHFVKLLEPERPLADRQWALCVFDDVIEFGGPACVK +>FBpp0306926 +MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG +ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY +VSKRYKDLPPPHPGFGADQPPA +>FBpp0078508 +MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD +LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA +DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK +KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC +AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI +NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR +RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG +ETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRSGSSSGSGSGSGSRASSRSK +SGSRSGSGSRSRTNSPAGSQKSGSRSRSVSRSRSRSKSGSRSRSRSRSKSGSRSRSGS +RSGSGSRSPSRSRSGSPSGSGSSSGSASDE |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotate_custom.hmm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotate_custom.hmm Sat Dec 10 21:14:28 2022 +0000 |
[ |
b'@@ -0,0 +1,104 @@\n+HMMER3/b [3.0 | March 2010]\n+NAME CBM10\n+LENG 28\n+ALPH amino\n+RF no\n+CS no\n+MAP yes\n+DATE Thu Apr 21 15:04:19 2011\n+NSEQ 84\n+EFFN 8.697876\n+CKSUM 1939305542\n+STATS LOCAL MSV -7.3395 0.71998\n+STATS LOCAL VITERBI -7.4498 0.71998\n+STATS LOCAL FORWARD -3.9737 0.71998\n+HMM A C D E F G H I K L M N P Q R S T V W Y \n+ m->m m->i m->d i->m i->i d->m d->d\n+ COMPO 3.06033 2.42663 2.85747 2.77283 4.58046 2.24214 4.56942 3.29197 3.49682 3.34028 4.33944 2.39813 3.09771 2.95262 3.60586 2.44839 2.84569 3.24117 2.72301 3.28669\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.59367 5.66823 0.81137 0.61958 0.77255 0.00000 *\n+ 1 2.74340 3.47729 3.44643 2.64352 4.17985 3.79353 4.02282 3.57300 2.87097 3.22136 2.03109 3.38316 4.18747 1.79120 3.30841 2.03216 2.98385 3.28775 5.51287 4.21451 1 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00925 5.08381 5.80616 0.61958 0.77255 0.72961 0.65797\n+ 2 3.21366 0.33490 5.23672 5.17809 5.69922 3.92522 5.89105 5.22956 5.14397 4.94899 5.73770 3.11988 4.78715 5.32569 5.20447 2.46009 3.79312 4.43668 7.07761 5.95722 2 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00690 5.37599 6.09834 0.61958 0.77255 0.73167 0.65606\n+ 3 3.46517 6.03524 2.03286 2.67508 5.36194 2.25828 4.36563 4.86858 2.77875 4.33291 5.09631 1.07839 4.52524 2.62719 3.73072 2.75509 3.71412 4.41890 6.46315 5.00841 3 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.04091 5.49877 3.32435 0.61958 0.77255 0.52775 0.89143\n+ 4 2.51597 5.24536 3.21395 3.79383 4.81190 3.30725 4.89534 4.21717 3.89295 3.90352 4.76570 4.13904 4.73005 4.17210 4.26805 3.51068 1.58681 3.86875 0.87084 4.95297 4 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00570 5.56595 6.28829 0.61958 0.77255 0.68571 0.70065\n+ 5 1.88947 5.00491 4.00042 3.04471 4.13869 4.17114 3.84034 3.52228 3.40249 3.22374 4.10809 3.86994 4.55537 3.71667 3.77844 2.67165 2.19530 3.28758 3.58338 1.27239 5 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00552 5.59725 6.31960 0.61958 0.77255 0.60266 0.79264\n+ 6 3.27926 5.74099 2.79040 2.94678 5.08062 1.27592 4.24831 3.41153 3.01359 4.05572 4.81107 3.21498 2.15563 2.32440 3.50951 2.31938 3.51838 4.13470 6.20511 4.79969 6 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00538 5.62391 6.34625 0.61958 0.77255 0.69965 0.68669\n+ 7 2.98451 5.68149 2.03356 2.71108 5.01893 3.09684 4.15179 4.49988 2.89056 3.98355 4.72438 1.82442 4.35665 2.68802 3.38251 2.74393 1.71463 4.06807 3.52699 3.90983 '..b' 7.77288 27 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00434 5.83749 6.55983 0.61958 0.77255 0.48576 0.95510\n+ 21 2.85558 4.93320 5.01904 4.44670 3.96129 4.55274 4.87439 3.39985 4.30306 3.14653 4.07295 3.28033 4.94659 4.51322 4.45054 3.85882 2.48037 2.52459 0.81497 2.55247 28 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00434 5.83749 6.55983 0.61958 0.77255 0.48576 0.95510\n+ 22 4.41613 7.05734 3.41290 0.33211 6.45410 4.55267 5.18133 6.03259 4.21333 5.45749 6.34879 4.02373 5.23058 2.45539 4.78815 2.76925 4.73557 5.53995 7.57078 6.01524 29 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00434 5.83749 6.55983 0.61958 0.77255 0.48576 0.95510\n+ 23 4.65182 7.42009 2.82915 3.46315 6.72740 4.60645 5.35687 6.43956 4.62773 5.83877 6.82459 0.20801 5.34677 3.75994 5.38642 4.44973 5.00947 5.90632 7.92820 6.25143 30 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00434 5.83749 6.55983 0.61958 0.77255 0.48576 0.95510\n+ 24 3.03871 6.18494 3.21297 2.61231 5.52195 2.08712 4.55336 5.02325 3.36798 4.49202 5.24999 1.02698 4.71928 2.57491 3.89135 1.97384 3.88199 4.57575 6.62318 5.17899 31 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00504 5.68941 6.41175 0.61958 0.77255 0.48576 0.95510\n+ 25 1.99175 5.71724 3.48758 2.45228 5.05473 4.00588 4.19152 4.53331 2.79074 4.01804 4.28019 3.47693 4.39887 1.29371 2.11828 2.43019 3.30733 4.10393 6.15338 3.96473 32 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00504 5.68941 6.41175 0.61958 0.77255 0.48576 0.95510\n+ 26 3.43388 5.57346 3.69941 2.95794 4.91426 4.17833 4.53372 3.14649 3.35913 3.94178 4.77104 3.49497 4.65004 3.34410 3.80156 0.56810 2.88463 3.99116 6.19270 4.86921 33 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.00504 5.68941 6.41175 0.61958 0.77255 0.48576 0.95510\n+ 27 4.26126 0.16067 6.38751 6.27473 5.43175 4.92822 6.57062 4.62334 5.97406 4.19855 3.09118 5.80642 5.70066 6.20961 5.93464 4.54901 4.79228 4.45835 7.10686 5.93986 34 - -\n+ 2.68618 4.42225 2.77519 2.73123 3.46354 2.40513 3.72494 3.29354 2.67741 2.69355 4.24690 2.90347 2.73739 3.18146 2.89801 2.37887 2.77519 2.98518 4.58477 3.61503\n+ 0.05109 5.66823 3.07142 0.61958 0.77255 0.48576 0.95510\n+ 28 4.30379 3.20827 6.48590 6.05149 4.97840 6.03933 6.92759 0.73764 6.03790 2.64441 4.67131 6.21290 6.22644 6.33190 6.22989 5.50520 4.56636 1.06340 7.18287 5.92811 35 - -\n+ 2.68632 4.41952 2.77533 2.73137 3.46368 2.40527 3.72508 3.29292 2.67755 2.69369 4.24704 2.90360 2.73753 3.18160 2.89814 2.37901 2.77533 2.98380 4.58491 3.61517\n+ 0.20465 1.68705 * 0.08250 2.53598 0.00000 *\n+//\n' |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1.genbank --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1.genbank Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,24 @@ +LOCUS dataset_5327_scaffold_1510 11453 bp DNA linear ENV 05-AUG-2022 +FEATURES Location/Qualifiers + CDS 478..1935 + /conf=100.00 + /cscore=116.40 + /gc_cont=0.437 + /gene=dataset_5327_scaffold_1510_1 + /partial=00 + /codon_start=1 + /rbs_motif=AGxAGG/AGGxGG + /rbs_spacer=11-12bp + /rscore=4.78 + /score=128.32 + /inference=Prodigal_v2.6.3 + /sscore=11.92 + /start_type=ATG + /translation=MGQDRQNSQELLNELNYLRQRLAELEEMNRDYLGMIENSYDAMSIADCDGRLLLINPAFERIMGITKSETLSRTIQDLTNDGITDASAALKAFETGKQESVIINTRAGRQVLSTGVPFYDQTGKIVRVYCNIRDVTELNHLRQKFEQSQKLASRYLFELLEFKRGKTFKFVAHSNKIKQMLETVHRIAVVDSTVLILGESGVGKDLVARIIHEASSRNDSGSFLKINCAAIPAELLESELFGYEGGAFTGAKKDGKAGYFEIADKGTLFLDEIGELPQKLQVKLLAVIQDQKITRIGGVKEKDVDVRIIAATNRDLEEMVKQGNFREDLFYRLNVIPITIPPLRERKEDIPFLIVHYTELFNKKYNRAVKFSKEAIEMLCKYNWPGNVRELANLVERVIVIGQESILNPEHIPGKYHTAAQNMAETVSDFKSLSDAVEKYELKLVKNTLELCKTREEAASKLGISLSGLSRRIRRLKQLENEGFI* +ORIGIN + 1 attgcctctt gtggccgggc catatcgaaa ggtatgtgcc gtagcaatac ggaaatctct + 61 gggatgctga aaaaagctct cggctctaag ttgactatct tgggttaact tggatgcccc + 121 tcactttagt gtcactttag tgaggggagg aggtcacttt atccagtgag ggcataaaaa + 181 atacggtagt tctttgcctg tttaatctgc ttaatcattc ttgcactgct gaattagagg + 241 acatccggcg gttggatgac ctattggaca gttggcagct cggtcccgcg ccaggcgatc + 301 tatgggatac tataatgacc ggggttagcc agtgtcatgc gcaaagtacc tgctttggta |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1.gff Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,11 @@ +##gff-version 3 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 478 1935 128.32 + 0 ID=dataset_5327_scaffold_1510_1;conf=100.00;cscore=116.40;gc_cont=0.437;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=11-12bp;rscore=4.78;sscore=11.92;start_type=ATG;tscore=2.70;uscore=4.44 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 2271 3857 216.97 + 0 ID=dataset_5327_scaffold_1510_2;conf=99.99;cscore=204.79;gc_cont=0.522;partial=00;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;rscore=6.26;sscore=12.18;start_type=ATG;tscore=2.70;uscore=3.87 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 4217 5395 100.23 + 0 ID=dataset_5327_scaffold_1510_3;conf=100.00;cscore=107.40;gc_cont=0.520;partial=00;rbs_motif=None;rbs_spacer=None;rscore=-11.04;sscore=-7.17;start_type=ATG;tscore=2.70;uscore=1.83 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 5385 6587 141.67 + 0 ID=dataset_5327_scaffold_1510_4;conf=100.00;cscore=125.51;gc_cont=0.526;partial=00;rbs_motif=GGAGG;rbs_spacer=5-10bp;rscore=12.15;sscore=16.15;start_type=ATG;tscore=2.70;uscore=0.08 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 6684 8315 209.17 + 0 ID=dataset_5327_scaffold_1510_5;conf=99.99;cscore=189.80;Dbxref="ko:K00179";gc_cont=0.559;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;rscore=8.35;sscore=19.37;start_type=ATG;tscore=2.70;uscore=8.32 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 8453 9043 86.60 + 0 ID=dataset_5327_scaffold_1510_6;conf=100.00;cscore=72.66;Dbxref="ko:K00180";gc_cont=0.584;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;rscore=8.35;sscore=13.94;start_type=ATG;tscore=2.70;uscore=2.89 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 9217 9381 10.26 + 0 ID=dataset_5327_scaffold_1510_7;conf=91.36;cscore=6.45;gc_cont=0.491;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=11-12bp;rscore=3.10;sscore=3.81;start_type=GTG;tscore=-4.98;uscore=5.69 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 9671 10636 92.34 - 0 ID=dataset_5327_scaffold_1510_8;conf=100.00;cscore=90.86;gc_cont=0.475;partial=00;rbs_motif=AGGA;rbs_spacer=5-10bp;rscore=0.52;sscore=1.48;start_type=TTG;tscore=-1.88;uscore=3.50 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 10802 11086 32.01 + 0 ID=dataset_5327_scaffold_1510_9;conf=99.94;cscore=15.94;gc_cont=0.474;partial=00;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;rscore=6.26;sscore=16.08;start_type=ATG;tscore=2.70;uscore=7.11 +dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 11386 11451 6.71 + 0 ID=dataset_5327_scaffold_1510_10;conf=82.39;cscore=-2.06;Dbxref="ko:K03666";gc_cont=0.318;partial=01;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;rscore=6.26;sscore=8.77;start_type=ATG;tscore=2.70;uscore=4.81 |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1.tabular Sat Dec 10 21:14:28 2022 +0000 |
[ |
b'@@ -0,0 +1,520 @@\n+\tfasta\tscaffold\tgene_position\tstart_position\tend_position\tstrandedness\trank\tko_id\tkegg_hit\tpeptidase_id\tpeptidase_family\tpeptidase_hit\tpeptidase_RBH\tpeptidase_identity\tpeptidase_bitScore\tpeptidase_eVal\tpfam_hits\tcazy_id\tcazy_hits\theme_regulatory_motif_count\n+dataset_4924_scaffold_1140_1\tdataset_4924\tscaffold_1140\t1\t233\t793\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_2\tdataset_4924\tscaffold_1140\t2\t910\t1617\t-1\tD\t\t\t\t\t\t\t\t\t\tVitamin B12 dependent methionine synthase, activation domain [PF02965.20]\t\t\t0\n+dataset_4924_scaffold_1140_3\tdataset_4924\tscaffold_1140\t3\t1614\t3491\t-1\tD\t\t\t\t\t\t\t\t\t\tC-terminal domain of RACo the ASKHA domain [PF14574.9]; RACo middle region [PF17651.4]; RACo linker region [PF17650.4]; 2Fe-2S iron-sulfur cluster binding domain [PF00111.30]\t\t\t0\n+dataset_4924_scaffold_1140_4\tdataset_4924\tscaffold_1140\t4\t3670\t4368\t-1\tD\t\t\t\t\t\t\t\t\t\tUTRA domain [PF07702.16]; Bacterial regulatory proteins, gntR family [PF00392.24]\t\t\t0\n+dataset_4924_scaffold_1140_5\tdataset_4924\tscaffold_1140\t5\t4361\t5023\t-1\tD\t\t\t\t\t\t\t\t\t\tB12 binding domain [PF02310.22]; B12 binding domain [PF02607.20]\t\t\t0\n+dataset_4924_scaffold_1140_6\tdataset_4924\tscaffold_1140\t6\t5287\t5979\t-1\tD\t\t\t\t\t\t\t\t\t\tProtein of unknown function (DUF1638) [PF07796.14]\t\t\t0\n+dataset_4924_scaffold_1140_7\tdataset_4924\tscaffold_1140\t7\t6682\t7914\t-1\tD\t\t\t\t\t\t\t\t\t\tMajor Facilitator Superfamily [PF07690.19]; Transmembrane secretion effector [PF05977.16]; Sugar (and other) transporter [PF00083.27]\t\t\t0\n+dataset_4924_scaffold_1140_8\tdataset_4924\tscaffold_1140\t8\t8299\t8454\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_9\tdataset_4924\tscaffold_1140\t9\t8469\t9626\t-1\tD\t\t\t\t\t\t\t\t\t\tUroporphyrinogen decarboxylase (URO-D) [PF01208.20]\t\t\t0\n+dataset_4924_scaffold_1140_10\tdataset_4924\tscaffold_1140\t10\t9919\t10062\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_11\tdataset_4924\tscaffold_1140\t11\t10076\t11230\t-1\tD\t\t\t\t\t\t\t\t\t\tUroporphyrinogen decarboxylase (URO-D) [PF01208.20]\t\t\t0\n+dataset_4924_scaffold_1140_12\tdataset_4924\tscaffold_1140\t12\t11232\t11870\t-1\tD\t\t\t\t\t\t\t\t\t\tB12 binding domain [PF02310.22]; B12 binding domain [PF02607.20]\t\t\t0\n+dataset_4924_scaffold_1140_13\tdataset_4924\tscaffold_1140\t13\t11978\t12793\t-1\tC\tK15023\t5-methyltetrahydrofolate corrinoid/iron sulfur protein methyltransferase [EC:2.1.1.258]\t\t\t\t\t\t\t\tPterin binding enzyme [PF00809.25]\t\t\t0\n+dataset_4924_scaffold_1140_14\tdataset_4924\tscaffold_1140\t14\t12970\t14265\t-1\tD\t\t\t\t\t\t\t\t\t\tAcetyl-CoA hydrolase/transferase C-terminal domain [PF13336.9]; Acetyl-CoA hydrolase/transferase N-terminal domain [PF02550.18]\t\t\t0\n+dataset_4924_scaffold_1140_15\tdataset_4924\tscaffold_1140\t15\t14665\t16461\t-1\tD\t\t\t\t\t\t\t\t\t\tSigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; Helix-turn-helix domain [PF18024.4]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28]\t\t\t0\n+dataset_4924_scaffold_1140_16\tdataset_4924\tscaffold_1140\t16\t16623\t16886\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_17\tdataset_4924\tscaffold_1140\t17\t16949\t17716\t-1\tC\tK07546\t(E)-benzylidenesuccinyl-CoA hydratase [EC:4.2.1.180]\tMER1073240\tS49C\tMER1073240 - subfamily S49C non-peptidase homologues (Acinetobacter bohemicus) [S49.UNC]#S49C#{peptidase unit: 84-212}~source N8Q6E4~\tFalse\t0.498\t111.0\t1.871e-25\tEnoyl-CoA hydratase/isomerase [PF00378.23]; Enoyl-CoA hydratase/isomerase [PF16113.8]\t\t\t0\n+dataset_4924_scaffold_1140_18\tdataset_4924\tscaffold_1140\t18\t17764\t18030\t-1\tD\t\t\t\t\t\t\t\t\t\tAcyl-CoA dehydrogenase, C-terminal domain [PF00441.27]\t\t\t0\n+dataset_4924_scaffold_1172_1\tdataset_4924\tscaffold_1172\t1\t3\t233\t1\tD\t\t\t\t\t\t\t\t\t\tACS/CODH beta subunit C-terminal [PF19436.2]\t\t\t0\n+dataset_4924_scaffold_1172_2\tdataset_4924\tscaffold_1172\t2\t366\t1763\t1\tC\tK00197\tacetyl-CoA decarbonylase/synthase, CODH/ACS complex subunit gamma [EC:2.1.1.245]\t\t\t\t\t\t\t\tCO dehydrogenase/acetyl-CoA synthase delta subunit [PF03599.19]; Putative Fe-S cluster [PF04060.16]\t\t\t0\n+dataset_4924_scaffold_1172_3\tdataset_4924\tscaffold_1172\t3\t1862\t2620\t1\tC\tK07321\tCO dehydrogenase maturation factor\t\t\t\t\t\t\t\tAAA domain [PF13614.9]\t\t\t0\n+datase'..b'rotein, Fis family [PF02954.22]\t\t\t0\n+dataset_4924_scaffold_988_4\tdataset_4924\tscaffold_988\t4\t4895\t5542\t1\tD\t\t\t\t\t\t\t\t\t\tBacterial regulatory proteins, tetR family [PF00440.26]\t\t\t0\n+dataset_4924_scaffold_988_5\tdataset_4924\tscaffold_988\t5\t6103\t7443\t1\tC\tK01845\tglutamate-1-semialdehyde 2,1-aminomutase [EC:5.4.3.8]\t\t\t\t\t\t\t\tAminotransferase class-III [PF00202.24]\t\t\t0\n+dataset_4924_scaffold_988_6\tdataset_4924\tscaffold_988\t6\t8230\t8502\t1\tD\t\t\t\t\t\t\t\t\t\tAntitoxin Phd_YefM, type II toxin-antitoxin system [PF02604.22]\t\t\t0\n+dataset_4924_scaffold_988_7\tdataset_4924\tscaffold_988\t7\t9340\t10764\t1\tD\t\t\t\t\t\t\t\t\t\tAmino acid permease [PF13520.9]; Amino acid permease [PF00324.24]\t\t\t0\n+dataset_4924_scaffold_988_8\tdataset_4924\tscaffold_988\t8\t10793\t11359\t1\tD\t\t\t\t\t\t\t\t\t\tProtein of unknown function (DUF3156) [PF11354.11]\t\t\t0\n+dataset_4924_scaffold_988_9\tdataset_4924\tscaffold_988\t9\t11462\t12565\t1\tD\t\t\tMER0158050\tS09X\tMER0158050 - family S9 non-peptidase homologues (Dipodomys ordii) [S09.UNW]#S09X#{peptidase unit: 54-305}~source ENSDORP00000014784~\tFalse\t0.269\t87.0\t1.587e-16\tNeurobeachin beta propeller domain [PF20426.1]; Eukaryotic translation initiation factor eIF2A [PF08662.14]; Anaphase-promoting complex subunit 4 WD40 domain [PF12894.10]; Cytochrome D1 heme domain [PF02239.19]; WD domain, G-beta repeat [PF00400.35]; WD40-like domain [PF17005.8]; Lactonase, 7-bladed beta-propeller [PF10282.12]; WD40 region of Ge1, enhancer of mRNA-decapping protein [PF16529.8]\t\t\t0\n+dataset_4924_scaffold_988_10\tdataset_4924\tscaffold_988\t10\t12655\t14052\t1\tC\tK01915\tglutamine synthetase [EC:6.3.1.2]\t\t\t\t\t\t\t\tGlutamine synthetase, catalytic domain [PF00120.27]; Glutamine synthetase N-terminal domain [PF16952.8]; Glutamine synthetase, beta-Grasp domain [PF03951.22]\t\t\t0\n+dataset_4924_scaffold_988_11\tdataset_4924\tscaffold_988\t11\t14086\t14982\t1\tC\tK22081\tmethylamine---glutamate N-methyltransferase subunit A [EC:2.1.1.21]\tMER0459985\tC44\tMER0459985 - family C44 unassigned peptidases (Desulfotomaculum gibsoniae) [C44.UPW]#C44#{peptidase unit: 2-218}~source ZP_09099530~\tTrue\t0.968\t445.0\t6.218e-140\tGlutamine amidotransferase domain [PF13522.9]; Glutamine amidotransferase domain [PF13537.9]; Glutamine amidotransferases class-II [PF13230.9]\t\t\t0\n+dataset_4924_scaffold_988_12\tdataset_4924\tscaffold_988\t12\t14970\t15671\t1\tC\tK22082\tmethylamine---glutamate N-methyltransferase subunit B [EC:2.1.1.21]\t\t\t\t\t\t\t\tGXGXG motif [PF01493.22]\t\t\t0\n+dataset_4924_scaffold_988_13\tdataset_4924\tscaffold_988\t13\t15701\t16999\t1\tC\tK22083\tmethylamine---glutamate N-methyltransferase subunit C [EC:2.1.1.21]\t\t\t\t\t\t\t\tConserved region in glutamate synthase [PF01645.20]; FMN-dependent dehydrogenase [PF01070.21]; Nitronate monooxygenase [PF03060.18]\t\t\t1\n+dataset_4924_scaffold_988_14\tdataset_4924\tscaffold_988\t14\t17019\t17342\t1\tD\t\t\t\t\t\t\t\t\t\t2Fe-2S iron-sulfur cluster binding domain [PF13510.9]\t\t\t0\n+dataset_4924_scaffold_988_15\tdataset_4924\tscaffold_988\t15\t17339\t18931\t1\tD\t\t\t\t\t\t\t\t\t\tPyridine nucleotide-disulphide oxidoreductase [PF07992.17]; HI0933-like protein [PF03486.17]; NAD(P)-binding Rossmann-like domain [PF13450.9]; FAD dependent oxidoreductase [PF12831.10]; FAD binding domain [PF00890.27]; FAD binding domain [PF01494.22]; 4Fe-4S dicluster domain [PF13187.9]; 4Fe-4S dicluster domain [PF14697.9]; 4Fe-4S dicluster domain [PF13247.9]; Flavin-binding monooxygenase-like [PF00743.22]; FAD dependent oxidoreductase [PF01266.27]\t\t\t0\n+dataset_4924_scaffold_988_16\tdataset_4924\tscaffold_988\t16\t19221\t19571\t1\tD\t\t\t\t\t\t\t\t\t\tSarcosine oxidase A3 domain [PF17806.4]; BFD-like [2Fe-2S] binding domain [PF04324.18]\t\t\t0\n+dataset_4924_scaffold_988_17\tdataset_4924\tscaffold_988\t17\t19564\t20685\t1\tD\t\t\t\t\t\t\t\t\t\tFAD dependent oxidoreductase [PF01266.27]\t\t\t0\n+dataset_4924_scaffold_988_18\tdataset_4924\tscaffold_988\t18\t20716\t21117\t1\tC\tK09022\t2-iminobutanoate/2-iminopropanoate deaminase [EC:3.5.99.10]\t\t\t\t\t\t\t\tEndoribonuclease L-PSP [PF01042.24]\t\t\t0\n+dataset_4924_scaffold_988_19\tdataset_4924\tscaffold_988\t19\t21469\t21915\t-1\tD\t\t\t\t\t\t\t\t\t\tFamily of unknown function (DUF6144) [PF19641.2]\t\t\t0\n' |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_genes_faa.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1_genes_faa.fasta Sat Dec 10 21:14:28 2022 +0000 |
[ |
@@ -0,0 +1,6 @@ +>dataset_5327_scaffold_1510_1 rank: D; Sigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28] (db=pfam) +MGQDRQNSQELLNELNYLRQRLAELEEMNRDYLGMIENSYDAMSIADCDGRLLLINPAFERIMGITKSETLSRTIQDLTNDGITDASAALKAFETGKQESVIINTRAGRQVLSTGVPFYDQTGKIVRVYCNIRDVTELNHLRQKFEQSQKLASRYLFELLEFKRGKTFKFVAHSNKIKQMLETVHRIAVVDSTVLILGESGVGKDLVARIIHEASSRNDSGSFLKINCAAIPAELLESELFGYEGGAFTGAKKDGKAGYFEIADKGTLFLDEIGELPQKLQVKLLAVIQDQKITRIGGVKEKDVDVRIIAATNRDLEEMVKQGNFREDLFYRLNVIPITIPPLRERKEDIPFLIVHYTELFNKKYNRAVKFSKEAIEMLCKYNWPGNVRELANLVERVIVIGQESILNPEHIPGKYHTAAQNMAETVSDFKSLSDAVEKYELKLVKNTLELCKTREEAASKLGISLSGLSRRIRRLKQLENEGFI* +>dataset_5327_scaffold_1510_2 rank: D; AMP-binding enzyme [PF00501.31]; AMP-binding enzyme C-terminal domain [PF13193.9] (db=pfam) +MTVSKWMHVGVALKMNARNYPDKLGCQDKRKSYTFKEWNERSCRLASALKDMGVGYGERVAVIAYNRVEWMEIYAACAKGGQIVVPVMFRLTPHEFEYIVNHSGCKAFIVEEPFVKGVDSVRDILTTIPEGNYIYLGDGEAPEGYIHYESVMAQGDPSEPDISVDAADPWTIMYTSGTTGRPKGVVRTHENYLGQYLINNINMGVRPNDKPLLVMPMCHVNSIYYSFCYTYISAPVMVYNMVSFDPEDLLKTIVDYRVTFTSLVPTHYIMILALPDEIKQKYDTSCIRQLLISSAPARRDLKLAIMKYFKSAELWEAYGSTETSLVTYLRPEDQLTKLGSIGKEVFGCDEIKLLDENGEEVPVGEVGELYSRSPGMFKEYWKDPGKTSEVFRGKWCTAGDMGRRDEDGYYYLVDRKANMIISGGENVYPSEVENVVGAHPAVKDGAVIGVPDQKWGEIVLAFIILHEGYQAGDELAGEIINFCKDRVAGYKRPKSIRFISEEEMPRTGNGKIMHRVLREKYGKWSDSV* +>dataset_5327_scaffold_1510_3 rank: D; 2-hydroxyglutaryl-CoA dehydratase, D-component [PF06050.16] (db=pfam) +MTDRKTIKEICAQFKEIIAEPGLKIQRLQAEKPAPVIGFLPTDVPEELIHASGAYPFGLVAYDGLWVNRADAHLQTWACSLARCSFGMSLAGKFDYLNGLIIPHICDTTRMISDIWKQNRPYDFMENFILPRQVDRPSARSYLTGELGRLKARLEQFTGRSINGEKLNRSINLYNKHRALLRKLYQLHGHHPDLITNLDLFNAIKSSMLIPKGLHNTMVSELISAVEQQAREKQAEDNHGRVRVVVTGKVWEPPDIMEILDQSKVVCVADDLCTGYRYIANDAAEDGDPLETLAVRQINRPPSPCFVNREQDRLEYLTRKVNECGAKGVIFLHLKFCETENYDYPLLRDALSAANIPSVRVETEIGNMSQGQISTRIQAFAEMLGGGDIYGS* |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_genes_fna.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1_genes_fna.fasta Sat Dec 10 21:14:28 2022 +0000 |
[ |
@@ -0,0 +1,6 @@ +>dataset_5327_scaffold_1510_1 rank: D; Sigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28] (db=pfam) +ATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAA +>dataset_5327_scaffold_1510_2 rank: D; AMP-binding enzyme [PF00501.31]; AMP-binding enzyme C-terminal domain [PF13193.9] (db=pfam) +ATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAA +>dataset_5327_scaffold_1510_3 rank: D; 2-hydroxyglutaryl-CoA dehydratase, D-component [PF06050.16] (db=pfam) +ATGACTGACCGTAAAACCATTAAAGAAATCTGTGCACAATTTAAAGAAATTATTGCCGAGCCTGGTCTAAAGATTCAGCGGCTGCAAGCCGAAAAACCTGCTCCGGTAATCGGGTTTTTGCCCACTGACGTGCCTGAAGAATTAATCCATGCCTCGGGCGCTTACCCCTTCGGGCTGGTGGCTTATGATGGATTATGGGTCAACCGGGCCGACGCCCACTTGCAGACCTGGGCATGCTCTCTGGCACGATGTTCCTTCGGGATGTCTCTGGCCGGGAAGTTCGATTACCTGAACGGGCTGATCATCCCTCATATTTGCGACACCACCCGGATGATTTCAGACATTTGGAAACAAAACCGGCCTTACGATTTCATGGAGAACTTTATACTGCCCCGGCAGGTTGATCGTCCCAGTGCCAGGAGTTATCTCACCGGTGAATTGGGCCGGTTGAAGGCGCGTTTGGAGCAGTTTACGGGCAGGTCTATTAACGGTGAAAAATTAAACCGGAGCATCAACCTTTACAACAAGCATCGTGCTTTATTAAGAAAACTCTACCAGCTCCATGGCCACCACCCGGATCTGATCACCAACCTGGACTTATTCAATGCCATCAAATCTTCCATGCTGATCCCGAAAGGACTGCACAACACCATGGTCAGTGAACTGATCAGCGCTGTTGAGCAGCAGGCCCGGGAAAAACAGGCGGAAGATAACCACGGCCGGGTTAGGGTGGTAGTTACGGGTAAAGTCTGGGAACCCCCGGACATTATGGAGATCCTGGACCAGTCGAAAGTTGTGTGCGTGGCCGACGACCTGTGCACCGGCTACCGCTATATTGCCAACGATGCAGCAGAGGATGGCGATCCGCTGGAAACGCTGGCCGTTCGCCAAATAAATCGCCCACCGTCCCCCTGTTTTGTAAACCGCGAGCAGGACCGCCTAGAATACTTGACCCGTAAGGTAAATGAGTGCGGGGCAAAGGGTGTGATTTTCCTGCATTTGAAATTCTGCGAGACGGAAAATTATGACTACCCGCTGCTGCGTGATGCTCTGTCAGCCGCCAATATACCAAGCGTTCGGGTTGAAACGGAGATTGGAAACATGTCCCAGGGGCAGATTAGCACGCGCATTCAGGCTTTTGCGGAAATGTTGGGGGGAGGCGATATCTATGGCAGTTAG |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_rrnas.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1_rrnas.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +scaffold fasta begin end strand type e-value note +dataset_5327_scaffold_361 dataset_5327 61952 62058 - 5S rRNA 5.2e-16 |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_scaffold.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1_scaffold.fasta Sat Dec 10 21:14:28 2022 +0000 |
b |
b'@@ -0,0 +1,2 @@\n+>dataset_5327_scaffold_1510\n+TACAGGATGCTTTTTTAAACCAGGTTAGAAAGGATAATATACCTGTAACAATCTTTTTGGTTAATGGATTTCAATTAAAAGGTATGGTTAAGGGTTTTGATAATTTTACAGTAATTATGGAAAGTGACGGCAAACAAATGATGGTATATAAACACGCTATTTCTACAGTTAGTCCTATGAAACCGGTAAATACTTCTTTTTCGGAAGTTAAGCCTGGCTAAATAAAAGGTGTAGGTCATGTCTTTTAGTATATTCCACTTAGAAAACATTCCAAATTCAATCATTCCGCCTGGCTTTTCATTGAAAAGCAGGCGGTTTCTATTTTATAGCGCAACTTTGGTATTGATTGGGATGACTATAATAGTCCATAATTACACATATACATAATCATTTAATCAAGCGTGGTTATGTGACTGTAATTCCACTGTTCGTTTGGAATTGAATATACTCTTTGGGATTGAGAAGGGAAGTGAATAAATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAAATGTTTTTTCAAGTTTAAGTATAAATTAAAAAACTAGGTCAAAGTTGAATGTCATGTTTAACCTTGTCAAAAGTCTTTATTTTAGTACAATAACAGTTTCATATTTGGTTTTGTATGTCAAATTTGACACCCTCAAAATCTGACAATTCTGAAGATCGGGCTGAAAAGATGGTCTAAAAGTTGGCACGTATTTTGCGTAAAATACCATTATCATACATGAATAGAACATTAATGGTTAAGGAGGGAACATCTATGACAGCTTGAGGGAATGCTTTGCCTGTTCTATGGTGGATGTTTTCAGAACCTTCGGACTTGTCAAAAGAGAGGGATATGAAATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAAAATATAAGTCAGAATATAATGGAATATAGGAAAGATAATGGATTATTTAATAAGTTAATCCAAAGCAGCAATGGTGCCAGGTGTTTAAAGGTTGAA'..b'GCATGGTATATCCGCTCCCTGACAAGCTAATCCGGGAACTGGCCGCGCGGGTGAAGCAAGTGGTGGTCATTGAGGAACTCGACCCCTTCATTGAAGAACAGGTACGGCTGGTGGGCATCCCGGCCCGAGGGAAAGATATTTTCCCCAATATTGGAGAATTTAACCCCGGGCGCGTTCGCCGGTGTGCCCAAGAAGCCGGGCTGATTCCCGGGCCGGCCCAACCGTCCCCCGCCGTGTCGGTACCCCAGCTGCCCGGACGGCCGCCCATGCTCTGTCCGGGTTGCGGTCACCGGGGCCTGTTTTACGCTCTTCAGCGTTTAAATGCGACTGTATTTGGAGACATCGGCTGTTACACCCTGGGTGCGGCGCCCCCTCTGAACGCCATGCATACCACCGGCTGTATGGGCGCCAGCATCGGCGTCGTACACGGGGTTGACCGGATAGGCGTGAAGGACCGCACCGTTGCCGTAATCGGTGACTCCACCTTTTTCCACAGCGGTGTGGCTCCATTGATTAACCTTCATTATAACAACGGTACCAGTACGGTAATTGTTGTTGACAACCGGGTGACGGCCATGACCGGGCACCAGACAAATCCCGGCACGGGTAATACATTGCTGGGTATAGAGTCCCCCGTGGTTAAAATTGACGAGCTGGCCCGCGGCATCGGTTTTAAAAAGGTGGATGTGGTGAACCCGTATGACTTTAAGACCGTGGTGGCCACTATCAAGGACCACCTACAATCCGAAGAACCTTCATTAATTGTGGCCCAGTACCCCTGTGTTTTGTATAAACGGGAACGGAAGCCGGCCCTGGTGGTGGATGTGGAGAAATGCAACGAGTGCGGCAACTGTTTGTGAATCGGTTGTTCTCCCATCACCAAAGTAGAAGGGGGCGTCAGCATCAACGCCGCGCTGTGCATTGGCTGCGGTTTTTGTGCCACCATTTGCAATCAGGGCGCCATTAGCCTGCCAAATAAAGAAAGGGGGGCTGAATAATGAAAAAGAAATTGGATTTCCTGTTTGCCGGAGTGGGGGGCCAGGGTACCATACTGGCCAGCAATATCGTCTCCGAAGTGGGTATGCGCTGCGGGTATGACATCAAGAAATCGGAAGTACATGGCATGTCCCAGCGGGGGGGTGCTGTGGAAAGCCATGTGCGCTGGGCGGAAAAAGTCTATTCCCCGTTGATTGAAGAGGGCAGTGCCGATTTCCTGCTGGCCTTTGAAATGCTGGAGGCGGCCCGCTGGCCCCAATACATGGCACCCGGTTCGGTGGTTCTGGTCAACAACCACCGGGTGATGCCGCCGTCGGTAAACCTGGGCCAGGCGCAATATCCGGAGGTAAAGGAAATTGAATCCATCATGACCGCGGCCGGCAGTCAGGTAATCTGGGTCGAGGGCGCAGCCAAGGCGGAAGAATTGGGGAACCCCGCCCTGGCCGGGGTGGTGCTGCTGGGCGTGCTGTCCGCCCGCCTGGACGAGCCGGTGGAAACCTGGCTGCAGGCCGTGCAGGATCTAGTTCCTGCAAAATTCAAGGAACTTAACGTAAAAGCTTTTCTGGCCGGGCGAGAAATGGCGGCGAAATAGCCTATAATTGAATTAGCACTACAGCAAAAAGTATAAGCGGATTTACCGTGGTGCCAGGTGTTGAAAAGTTGAAAGTTGAAAATGAAGACGATCAGGAGTAATGGCTTAATAAGTAATTTTTGCTGTAGTATTAGTAATTGAATTGAATAAATAAGAAGGGGGTTACATCGCCCGTGGTGGAAGCAGTGTTCAATGCGATGTTTATGACCGGCTACCGGGGATACTTTCAGAGCACGTTCAATGGCAGTGCAGGCGGCCATAATTTGCTTTTCCTCAATGGCGTGGAGGACTGCTTGATATCCTGCCTGATGTATCTGCAGGACCATTTCATGAAATAGGACGCTGCGAGCGCACAGTCTGCCTCGATGGACAGCAGCTTTTTCGGGCCCACAATAAATATCAGGCACGGGGCAATATAGAAAACGTTGAAATGCTGATTGCGCAGTATATTCTTATATTTGCTACTCATAAAGTCGGGATTCTGCTCGAGATGATGTAGAAGATTACGTTTGCTTTCGTCCGAAAGCCTTTTGATGACCTCCCTATTTTCAAGCTAGGCTATAATGGGTGTTGTCACAAGCTCTTTAATGATTAACTACCTCCGCACAAATGAACGTGCTTCTAACTTTAGTTTTCCAGTTCTTTCTTAAGTTCCAATACGGTCTCCACGGATAGTCCGGTTATTTCTGCCACGTCATTCACCGTGAAGCCCTTTTTCAAAGCAATCTTGGCAACCTCAACTTTGCCTTCAAGCTTGCCTGCAATTAGGGTCTGTTTTTTCATATCATCCAGGGTCCGCTCAATGTTGGTAATCATTTTTTCCACCTCCCGGGGTTCGGTTTCATCCAGTACACGTTCAATTTCCTTTTGCAGTGTACCTGGTAATTTGCGCTTGATCACGTTCCTCATCCAGACTGTTATTTGCCGGAATTGTTCCTGGTCTAGTTCCTTTAATATGTCGGCTAGTTTCCTGAGCCGGACCCCCAGTTCTTGCGGCTCAAGGGTCTGGTCCAGGTAGAATATGCTGGACACCACGTTAGCTGCACGGTGAAGGTCTTCCTCATTGTAACGAACGACGTCGAAAAGAATGTAGGAAAAGTCCAGTAAACGTCCAGGGAATTGTTCGTGCCCCGACTGGTACTCTCTGAAGCTTTGGTGGGCTGTCCAGCCTTTCTTGCCGTTGTAGAGAACTGCTGGTACGATAGCCGGCAGTTTGAAGTTCTTGCGTCTTCGTTCCTTTTCCGGGGCATTTTTGTAAATGTCCCGCCATATCTCAACCATGTATTGCAGCAAGCGAAAGGGCATAGTGTGGTCTACAGTCGACTGTAGTTCTAGAAGTACATAGAAGATTATTTCGGACCCGCCCAACCGCATTCTGTATACTATGTCGGCCTCTTTATCGCTAAAGTCCTGCAGTACATAGGATTTGTTGATCAACGTCAGGTCGCCCTCTTTTATTTCACGCACCCAACCCTCAGCCACAAATGTCTGCAGCAGTTCCAGGAAGGTCTTTTTGTTAGTCAGAAGCTGCCTGTATCCTTTGTCGTGAGGGTGGTGAGGCGGATTTTGTGCTGATTTCCCTCGGTTTGGCAATTTGTTCACTTCCTTACTGGTATTATATCATGGGGTTAGCTAGGGTGACAGAGGTTATGAAACTTATATTCCCTAACAAGCGATGCATCAGATATGGGCTTTGGCGATATGGCTACTGCTGGGCAAATGAGTCTATTAAAAGCTAAGAATGGTGGAGAAAAATAAATGGCCAAACCATCTTTTGACAAGTTCGCTGCCATGCTTAATCGAGCCGTAGACAGCATTCCACCACATTTCCTACGAGGCTTGACCGGCGGTTTTAACTTGCAGGAGGACGAAAAGTGTGAAGGCGAATACTATATTTTGGGTGAGTACATTGAGGATAGTATCCTGGGCTGCTTTATTGTGTTTTATTATGGTTCCTTCGTGGGACTGCTGAAAAACGAGCCCGACGATTGCTGGGAAGCAGAGATTGTTGATACGGTGCTGTATTTGTGCGCGCATCCCTGACCCCGGTTGTTGAACAGGCTATTACCTCTGCCGGCACTCATCATACCCTGGTTTCCATTTGTGGGTTATCTACATCACGAGTATTCTTTGGACGAAGCCGTTAATTTATTGAAAAGAAATACCAGAAGGTTTGCCAAGCGACAGTTGACTTGGTTTAGACGATATAGTAGTATCAAATGGATAGATATGGAGAAGTATGATATAATAAATAATGTTGCGGAAGAGATTAAATGTTTTATAGCTGAACTTATCCGGTGTCCATAGAAGGAAAAGTATAAAATACCAACAGAGGGAATTATATGACAAAGCCTCAGATAAATTTACAGGATGCTTTTTTAAACCAGGTTAGAAAGGATAATATACCTGT\n' |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_trnas.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated1_trnas.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +fasta Name tRNA # Begin End Type Codon Score Note +dataset_5327 dataset_5327_scaffold_1814 1 1368 1292 His GTG 73.0 +dataset_5327 dataset_5327_scaffold_1814 2 1285 1214 Gln TTG 60.8 +dataset_5327 dataset_5327_scaffold_1814 3 1182 1110 Lys TTT 77.5 +dataset_5327 dataset_5327_scaffold_361 1 29420 29495 Met CAT 73.6 +dataset_5327 dataset_5327_scaffold_361 2 56462 56380 Leu CAA 63.7 +dataset_5327 dataset_5327_scaffold_361 3 12681 12586 Cys GCA 26.9 pseudo |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2.genbank --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2.genbank Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,26 @@ +LOCUS dataset_5328_scaffold_2027 6089 bp DNA linear ENV 05-AUG-2022 +FEATURES Location/Qualifiers + CDS 3..500 + /conf=100.00 + /cscore=78.24 + /gc_cont=0.735 + /gene=dataset_5328_scaffold_2027_1 + /partial=10 + /codon_start=1 + /rbs_motif=None + /rbs_spacer=None + /rscore=0.00 + /score=81.46 + /inference=Prodigal_v2.6.3 + /sscore=3.22 + /start_type=Edge + /translation=RLVEGARSHPEGRLRRLGVAVLYLLGEAGVWPRELFALRLEDFQPAARVLRVRGEKARSVPLSKEATEALKAYLEDRESVAGLAPLPSPYLLLRMTPKKGGLGRPLNRDTLKGLLERALEMGGLEHPRPTGALRWRAVRRYLQQGLSPQEVARRTGVASVLSLKD* + /tscore=3.22 + /uscore=0.00 +ORIGIN + 1 cccgcctggt ggagggagcc cgctcccacc ccgaagggcg gctgcggcgg ttgggggtgg + 61 cggtgctgta cctgctgggg gaggccgggg tgtggcccag ggaactcttc gccctgcgcc + 121 tggaggactt ccagcccgcc gcccgcgtct tgcgggtgcg gggggaaaaa gcccgcagcg + 181 tgccgctttc caaggaggcg accgaggccc tcaaagccta tctggaagac cgggagagcg + 241 tggccggcct ggccccgctg ccctcgccct acctccttct gcgcatgacc cccaagaaag + 301 gcgggctggg acgacccctc aaccgcgaca ccctgaaggg gctgctggag cgggcgctgg |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2.gff Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,13 @@ +##gff-version 3 +# Sequence Data: seqnum=1;seqlen=6089;seqhdr="scaffold_2027" +# Model Data: version=Prodigal.v2.6.3;run_type=Metagenomic;model="13|Catenulispora_acidiphila_DSM_44928|B|69.8|11|1";gc_cont=69.80;transl_table=11;uses_sd=1 +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 3 500 81.5 + 0 ID=dataset_5328_scaffold_2027_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.735;conf=100.00;score=81.46;cscore=78.24;sscore=3.22;rscore=0.00;uscore=0.00;tscore=3.22; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 550 1395 121.4 + 0 ID=dataset_5328_scaffold_2027_2;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.652;conf=100.00;score=122.16;cscore=123.21;sscore=-1.05;rscore=-4.48;uscore=-1.84;tscore=4.54; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 1362 1499 15.7 - 0 ID=dataset_5328_scaffold_2027_3;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.572;conf=97.34;score=15.66;cscore=6.69;sscore=8.97;rscore=6.72;uscore=-0.20;tscore=2.45; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 1583 2284 128.2 - 0 ID=dataset_5328_scaffold_2027_4;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.674;conf=100.00;score=129.04;cscore=129.30;sscore=-0.26;rscore=-4.48;uscore=-1.16;tscore=4.54; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 2324 2995 109.1 - 0 ID=dataset_5328_scaffold_2027_5;partial=00;start_type=TTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.711;conf=100.00;score=109.08;cscore=123.84;sscore=-14.76;rscore=-4.48;uscore=-0.76;tscore=-9.51;Dbxref="ko:K02338"; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 2959 3363 42.5 - 0 ID=dataset_5328_scaffold_2027_6;partial=00;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=3-4bp;gc_cont=0.699;conf=99.99;score=41.89;cscore=22.38;sscore=19.51;rscore=14.94;uscore=0.69;tscore=4.54; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 3435 3653 31.0 + 0 ID=dataset_5328_scaffold_2027_7;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.644;conf=99.92;score=31.04;cscore=15.02;sscore=16.03;rscore=10.75;uscore=1.35;tscore=3.92; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 3716 4126 90.1 + 0 ID=dataset_5328_scaffold_2027_8;partial=00;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=3-4bp;gc_cont=0.710;conf=100.00;score=90.07;cscore=68.99;sscore=21.08;rscore=14.94;uscore=1.61;tscore=4.54; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 4128 4658 85.9 - 0 ID=dataset_5328_scaffold_2027_9;partial=00;start_type=GTG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.718;conf=100.00;score=87.09;cscore=80.36;sscore=6.73;rscore=10.10;uscore=-1.14;tscore=-3.38; +dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 4642 6087 162.5 - 0 ID=dataset_5328_scaffold_2027_10;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.717;conf=100.00;score=162.48;cscore=159.27;sscore=3.22;rscore=0.00;uscore=0.00;tscore=3.22; |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2.tabular Sat Dec 10 21:14:28 2022 +0000 |
[ |
@@ -0,0 +1,11 @@ + fasta scaffold gene_position start_position end_position strandedness rank ko_id kegg_hit pfam_hits heme_regulatory_motif_count +dataset_4710_scaffold_2027_1 dataset_4710 scaffold_2027 1 3 500 1 D Phage integrase family [PF00589.25] 0 +dataset_4710_scaffold_2027_2 dataset_4710 scaffold_2027 2 550 1395 1 D Protein of unknown function (DUF1385) [PF07136.14] 0 +dataset_4710_scaffold_2027_3 dataset_4710 scaffold_2027 3 1362 1499 -1 E 0 +dataset_4710_scaffold_2027_4 dataset_4710 scaffold_2027 4 1583 2284 -1 E 0 +dataset_4710_scaffold_2027_5 dataset_4710 scaffold_2027 5 2324 2995 -1 C K02338 DNA polymerase III subunit beta [EC:2.7.7.7] DNA polymerase III beta subunit, central domain [PF02767.19]; DNA polymerase III beta subunit, C-terminal domain [PF02768.18] 0 +dataset_4710_scaffold_2027_6 dataset_4710 scaffold_2027 6 2959 3363 -1 E 0 +dataset_4710_scaffold_2027_7 dataset_4710 scaffold_2027 7 3435 3653 1 E 0 +dataset_4710_scaffold_2027_8 dataset_4710 scaffold_2027 8 3716 4126 1 E 0 +dataset_4710_scaffold_2027_9 dataset_4710 scaffold_2027 9 4128 4658 -1 E 0 +dataset_4710_scaffold_2027_10 dataset_4710 scaffold_2027 10 4642 6087 -1 E 0 |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_genes_faa.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2_genes_faa.fasta Sat Dec 10 21:14:28 2022 +0000 |
[ |
@@ -0,0 +1,20 @@ +>dataset_5328_scaffold_2027_1 rank: D; Phage integrase family [PF00589.25] (db=pfam) +RLVEGARSHPEGRLRRLGVAVLYLLGEAGVWPRELFALRLEDFQPAARVLRVRGEKARSVPLSKEATEALKAYLEDRESVAGLAPLPSPYLLLRMTPKKGGLGRPLNRDTLKGLLERALEMGGLEHPRPTGALRWRAVRRYLQQGLSPQEVARRTGVASVLSLKD* +>dataset_5328_scaffold_2027_2 rank: D; Protein of unknown function (DUF1385) [PF07136.14] (db=pfam) +MDLNKLMGGMALPHGVVLMSTERVALGYYDKEGTLQLYTRELNNPSGGLKGLWTFFLEAARALWKTYPHQGEFRSVVAGVLAGVLTGIPIGLFLSRASLLPAWQMLLLSTSLVVLMFLALYRFYPPFRQGLQRMARYHGAEHKMIWALEKGEVSREGVRQQPLLHPACGSNLFALYLPFYLLSFPQSLLAPGFWWLQLLILPLLFPVFGWMRRHPEHPLARRLLALGYRFQRHTLAEPGEAELEAAWRALQGLEMETPSTSTVEGVRERCDYRRVKPRQAR* +>dataset_5328_scaffold_2027_3 rank: E +MIALFFLGSLTLTVAVAATLFFNFPAGLAVVAGIIYLAWRGFTRR* +>dataset_5328_scaffold_2027_4 rank: E +MDNPLEQIRSIPSRMLMALTWHGISLIQAPAALQRFDLPVRNLADLADPLTLLAALTPTTLYDFARSYTTATRWLETGRGEVVDLSYLHFVPGVFVQRAQEQAERGVLQGIYLLAMHPYMSLTAFTPVWPVLQITHPVLGPGFPLYELWPLQHWHLEDERRAVKGAFRTLLNVFAHTRLVPQGVLLDGSISGLCFEGYLHPAYALTEYRTAEWNLKEALTPRPEPGEWEPNDI* +>dataset_5328_scaffold_2027_5 rank: C; DNA polymerase III subunit beta [EC:2.7.7.7] (db=kegg) +MRDALEAVRYAVAREEYRKVLTGVRLEVGQTLRAVASDGYRLALQEFPLPAPLPAFQGVLPGAAVGDLLRLLAEEESVHLSLEKQMCYCQGERFRYATPLLSGEFPDYQRVMPTRYPAQAEVGPELSAALKRLEALSEDRVSKVQLTLQQEALHLRSENAYGLAEETVPAAVQGEPLHLTLNGRFLREALPDGGATLRFSGPATPLLITGQEGYQALLMPLRT* +>dataset_5328_scaffold_2027_6 rank: E +MVTVDKKPLHDAIGSLLRGLPQQERTVWFSAGENLKLQSFSHQRDLEVVVPLEAPAPAGFSLALDGSLLAEVVARMPEEIRLEREGEQLHLLGGTFQARLQTGWIDPPAPASARGRPPSPPGPCGTRWRRCATR* +>dataset_5328_scaffold_2027_7 rank: E +MKVKNPIFVRRILEEGAPLELLRHVAPDLPLEDLEEGRKLLRQAIWPEDVFDGERLFLRPRTPAQRALLQRL* +>dataset_5328_scaffold_2027_8 rank: E +MVPDFVLAYLPFVAALGLGGMVGRLLRVPLRAALWGLGLLGLGYVLLEPGQALALAQGWGRRGIEEALVWLGLPVQWAAYAFSPQWTWLVEQVSRHLVQQALAQADAGALERLNAYLQRADVGFVLGVLAGIDQRK* +>dataset_5328_scaffold_2027_9 rank: E +MLRVAESPQEALACDQALSLSQFQRRWPGLDPTALPGAVLLEREVAEITKARPRTVAFVALEALQEAEGFALRHLAGVAEMRYALGARPQDWQTDAAAVHLDATPDALWWSPEGVAAIEYDIRYNRDLVLRKAQDYRRIYVRQYWGATRLSRVQYLQRLLGPDPHTRVLLAPWLGG* +>dataset_5328_scaffold_2027_10 rank: E +ASHKRPSSVSYTSSTSSPDANSVPTPPPVASTPAPSGATPPSTATSAPPPPPLPTTTTPASQSPTPSAPASVPPPPSPPSSAGPSAPGERPYSGVVPPPPLPEYSATPAPAPGASPAAPAKPAFNPVKLQQAQQTASESGPVALKGEAASAQGKYAQLEREQAAKEPAYTAVLNPTSTKYTVFEGGKAEARNPVVLQGEQVQRSGYTPAIQSLSGPGGGYVALKTPQEQEKSAAAEPPTSAPTSSSDPLSPAPQASAALPAPTPAPTPASLSLSSPSVPTGQSPLSSPAPQAAASSPALAEYPFDVGEKQEGELVSGVVIPEGSSQAALMVRTKEGYVFFGVASLDRAGRLQMQFDRAYKGKTAYVVRAIALDERGVAGIPAQVSEQTPNLLTNLLRGAATGLVSYIDFYAKSSSTTILPGGGVASSNTPPPLGLTILSGSAKQIAAPPDSTSVVRVWSLDPGTKMQILIVPGEAAGAAGR* |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_genes_fna.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2_genes_fna.fasta Sat Dec 10 21:14:28 2022 +0000 |
[ |
@@ -0,0 +1,20 @@ +>dataset_5328_scaffold_2027_1 rank: D; Phage integrase family [PF00589.25] (db=pfam) +CGCCTGGTGGAGGGAGCCCGCTCCCACCCCGAAGGGCGGCTGCGGCGGTTGGGGGTGGCGGTGCTGTACCTGCTGGGGGAGGCCGGGGTGTGGCCCAGGGAACTCTTCGCCCTGCGCCTGGAGGACTTCCAGCCCGCCGCCCGCGTCTTGCGGGTGCGGGGGGAAAAAGCCCGCAGCGTGCCGCTTTCCAAGGAGGCGACCGAGGCCCTCAAAGCCTATCTGGAAGACCGGGAGAGCGTGGCCGGCCTGGCCCCGCTGCCCTCGCCCTACCTCCTTCTGCGCATGACCCCCAAGAAAGGCGGGCTGGGACGACCCCTCAACCGCGACACCCTGAAGGGGCTGCTGGAGCGGGCGCTGGAGATGGGCGGGCTGGAGCACCCGCGCCCCACCGGGGCCCTGCGCTGGCGGGCGGTGCGGCGGTACTTGCAGCAGGGCCTCTCCCCCCAGGAGGTGGCCCGGCGCACCGGGGTGGCCAGCGTGCTGAGCCTCAAGGACTGA +>dataset_5328_scaffold_2027_2 rank: D; Protein of unknown function (DUF1385) [PF07136.14] (db=pfam) +ATGGACCTGAATAAACTCATGGGTGGCATGGCCCTGCCCCACGGGGTGGTGTTGATGAGCACGGAGCGGGTGGCGCTGGGCTACTACGACAAAGAGGGAACCTTGCAGCTCTACACCCGCGAACTCAACAATCCCTCGGGCGGCTTGAAAGGCCTGTGGACGTTTTTCTTGGAGGCCGCACGGGCCCTGTGGAAGACCTACCCGCATCAGGGCGAATTTCGGAGCGTCGTGGCGGGGGTGCTGGCGGGGGTGCTGACGGGGATTCCCATCGGGCTGTTCCTGTCCCGGGCCTCGCTGCTTCCCGCTTGGCAGATGCTCCTCTTGAGCACCTCCCTGGTGGTGCTGATGTTTCTGGCGCTGTACCGCTTCTATCCGCCCTTCCGCCAGGGGTTACAGCGCATGGCCCGCTACCACGGGGCCGAGCACAAGATGATCTGGGCCCTCGAGAAAGGCGAGGTCAGCCGGGAAGGAGTCCGGCAGCAGCCGCTGTTGCATCCCGCTTGCGGCAGCAACCTGTTCGCCCTGTACCTGCCGTTCTACCTTCTGAGCTTCCCGCAGTCTCTGCTTGCTCCCGGGTTCTGGTGGTTGCAGCTGCTGATCCTGCCGCTGCTTTTTCCGGTCTTCGGCTGGATGCGCCGCCACCCCGAGCACCCGCTGGCGCGCAGGCTGCTGGCCCTGGGCTACCGCTTCCAGCGCCACACGCTGGCCGAGCCGGGGGAGGCCGAACTGGAGGCGGCCTGGCGGGCTTTGCAGGGGCTGGAAATGGAAACCCCCTCGACTTCAACAGTCGAGGGGGTCAGGGAGCGCTGTGACTACCGGCGGGTAAAGCCCCGCCAGGCCAGATAG +>dataset_5328_scaffold_2027_3 rank: E +ATGATAGCACTGTTCTTTTTGGGGAGCTTGACCCTGACTGTGGCCGTCGCGGCCACACTTTTTTTCAACTTCCCGGCGGGGTTGGCCGTCGTTGCCGGGATTATCTATCTGGCCTGGCGGGGCTTTACCCGCCGGTAG +>dataset_5328_scaffold_2027_4 rank: E +ATGGACAACCCCCTCGAACAGATCCGCTCCATCCCCTCCCGAATGCTCATGGCGCTGACCTGGCACGGTATCAGCCTGATCCAGGCTCCCGCTGCCCTCCAGCGCTTCGACCTGCCGGTGCGCAACCTGGCTGACCTGGCCGACCCGCTGACCCTGCTGGCGGCCCTCACCCCCACCACCCTGTACGACTTCGCCCGGAGCTACACCACCGCAACGCGCTGGCTGGAGACCGGGCGGGGGGAGGTGGTCGATCTCAGCTACCTGCACTTCGTCCCCGGAGTCTTCGTACAGCGGGCCCAGGAACAGGCCGAGCGGGGGGTCTTGCAGGGAATCTACCTGCTGGCCATGCACCCCTACATGTCCCTCACCGCCTTCACCCCGGTCTGGCCGGTGTTGCAGATCACCCACCCGGTGCTGGGCCCCGGCTTTCCGCTGTACGAGCTGTGGCCGTTGCAGCACTGGCACCTCGAGGACGAGCGGCGGGCGGTCAAAGGGGCCTTCCGCACCCTGCTGAACGTCTTCGCCCACACCCGGCTGGTCCCCCAGGGGGTGCTCCTGGACGGCAGCATCAGCGGGCTGTGCTTCGAGGGCTACCTGCACCCGGCCTACGCCCTCACCGAGTACCGCACCGCCGAGTGGAACCTCAAGGAAGCCCTCACTCCCCGCCCCGAGCCTGGCGAATGGGAGCCAAATGATATATAA +>dataset_5328_scaffold_2027_5 rank: C; DNA polymerase III subunit beta [EC:2.7.7.7] (db=kegg) +TTGCGGGACGCGCTGGAGGCGGTGCGCTACGCGGTAGCGCGGGAGGAGTACCGCAAGGTGCTGACCGGGGTGCGGCTGGAGGTGGGCCAGACCCTGCGGGCGGTGGCCTCCGACGGCTACCGCCTGGCCCTCCAGGAGTTCCCCCTGCCCGCCCCTCTCCCGGCCTTCCAGGGAGTCCTGCCCGGCGCGGCGGTAGGGGATCTGCTGCGCCTTCTGGCCGAGGAGGAATCAGTCCACCTCTCCCTGGAGAAGCAGATGTGCTACTGCCAGGGGGAGCGCTTCCGCTACGCCACCCCCCTGCTCTCGGGGGAGTTCCCCGACTACCAGCGGGTGATGCCCACCCGCTACCCCGCCCAGGCCGAGGTGGGGCCGGAACTCTCCGCTGCGCTGAAGCGGCTGGAAGCGCTGTCGGAAGACCGGGTTTCCAAGGTGCAGCTCACCCTCCAGCAGGAGGCCTTACACCTGCGCAGCGAAAACGCATACGGCCTCGCTGAGGAGACGGTCCCGGCGGCGGTGCAGGGGGAACCCCTGCACCTGACCCTGAACGGTCGCTTCCTCCGCGAGGCCCTGCCCGACGGAGGGGCCACCCTGCGCTTCAGCGGCCCCGCCACCCCGCTGCTGATCACCGGCCAGGAGGGCTACCAGGCCCTCCTCATGCCCCTGAGGACCTGA +>dataset_5328_scaffold_2027_6 rank: E +ATGGTGACGGTAGATAAAAAACCCCTACACGACGCGATTGGCAGCCTGTTGCGGGGCCTCCCACAACAGGAGCGCACGGTCTGGTTCTCCGCCGGGGAGAACCTCAAGCTCCAGTCCTTCTCGCACCAGCGGGATCTGGAGGTGGTGGTCCCCCTGGAGGCCCCGGCCCCGGCGGGCTTCAGCCTGGCCCTGGACGGCTCCCTGCTGGCCGAGGTGGTGGCCCGGATGCCGGAGGAGATCCGGCTGGAGCGGGAGGGGGAACAGCTTCATCTACTCGGCGGCACCTTCCAGGCCCGGCTGCAAACCGGCTGGATAGACCCCCCCGCCCCCGCTTCGGCCAGGGGCAGGCCGCCCTCCCCGCCCGGTCCTTGCGGGACGCGCTGGAGGCGGTGCGCTACGCGGTAG +>dataset_5328_scaffold_2027_7 rank: E +ATGAAAGTAAAAAATCCAATTTTTGTCCGCCGGATTCTCGAGGAGGGTGCGCCGCTGGAACTCCTCCGGCATGTCGCCCCGGACCTGCCCCTGGAGGATCTGGAAGAGGGACGCAAGCTCCTCCGCCAGGCCATCTGGCCGGAGGACGTGTTCGACGGGGAGCGGCTTTTCCTGCGCCCCCGCACCCCGGCGCAGCGGGCGCTCTTGCAGCGTCTGTAA +>dataset_5328_scaffold_2027_8 rank: E +ATGGTACCGGACTTTGTACTGGCCTATCTGCCCTTTGTGGCCGCGCTGGGGCTGGGAGGGATGGTGGGGCGGCTGTTGCGGGTACCGCTGCGGGCGGCGCTGTGGGGACTGGGCTTGCTGGGGCTGGGCTACGTCCTGCTCGAGCCGGGTCAGGCCCTGGCCCTGGCCCAGGGGTGGGGACGCCGGGGGATAGAGGAGGCCCTGGTGTGGCTGGGGCTGCCCGTGCAGTGGGCGGCCTACGCCTTCAGCCCGCAATGGACCTGGCTGGTGGAGCAGGTCAGCCGGCACCTGGTCCAGCAGGCCCTGGCTCAGGCCGACGCCGGAGCCCTGGAGCGCCTGAACGCCTATCTCCAGCGCGCCGACGTGGGCTTCGTCCTGGGAGTGCTGGCCGGGATCGACCAGCGCAAATAG +>dataset_5328_scaffold_2027_9 rank: E +GTGCTGCGGGTCGCTGAGTCCCCCCAGGAAGCCCTGGCCTGCGACCAGGCCCTGAGCCTGTCTCAGTTCCAGCGCCGCTGGCCGGGCCTCGACCCCACCGCTTTGCCCGGGGCGGTGCTGCTGGAGCGGGAGGTGGCCGAGATCACCAAAGCCCGGCCCCGAACGGTGGCCTTCGTGGCCCTGGAGGCCCTTCAGGAGGCCGAAGGCTTCGCCCTGCGCCACCTGGCCGGGGTGGCCGAGATGCGCTACGCCCTGGGAGCCCGCCCCCAAGACTGGCAGACCGACGCCGCTGCGGTCCACCTCGACGCGACCCCCGACGCTTTGTGGTGGAGCCCGGAAGGGGTGGCGGCCATCGAGTACGACATCCGCTACAACCGCGACCTGGTGCTGCGCAAGGCCCAGGACTACCGCCGCATCTACGTGCGGCAATACTGGGGAGCCACCCGGCTTTCGCGGGTGCAGTACCTTCAGCGCCTGCTGGGCCCCGACCCCCACACGCGGGTGCTGCTGGCCCCCTGGCTAGGAGGTTGA +>dataset_5328_scaffold_2027_10 rank: E +GCCAGCCATAAGCGTCCGTCGAGCGTGAGCTACACCTCCTCCACCTCGAGCCCGGACGCCAATTCCGTCCCTACCCCGCCCCCCGTCGCCTCCACCCCCGCCCCTTCGGGCGCCACGCCCCCCTCCACCGCCACCTCCGCCCCGCCTCCCCCGCCGCTGCCCACCACCACCACTCCGGCCTCGCAAAGCCCCACACCGTCCGCCCCCGCCAGCGTTCCCCCACCCCCTAGCCCCCCCAGCAGCGCAGGGCCATCGGCCCCCGGCGAGCGCCCGTACTCCGGGGTGGTGCCGCCGCCCCCGCTGCCCGAGTACAGCGCCACCCCGGCCCCCGCGCCCGGCGCTTCCCCTGCGGCCCCGGCCAAACCGGCCTTCAACCCGGTCAAGCTGCAACAAGCCCAGCAGACGGCCAGCGAAAGCGGCCCGGTGGCCCTCAAGGGCGAGGCCGCCTCGGCTCAGGGCAAGTACGCTCAGCTGGAGCGGGAACAAGCCGCCAAGGAACCGGCCTACACCGCCGTGCTCAACCCCACGAGCACCAAGTACACCGTCTTCGAGGGGGGCAAAGCCGAAGCCCGCAACCCGGTGGTTTTGCAGGGCGAACAGGTCCAGCGCAGCGGCTATACCCCGGCCATCCAGTCCCTCTCCGGGCCCGGCGGAGGGTATGTGGCCCTGAAAACTCCACAGGAGCAGGAAAAGTCTGCCGCTGCCGAGCCTCCAACCAGCGCCCCGACCTCCTCCAGCGACCCCCTCAGCCCCGCCCCCCAGGCCAGCGCCGCACTGCCCGCTCCCACACCCGCTCCCACACCCGCTTCCCTCTCCCTTTCCAGCCCCAGCGTTCCCACCGGCCAAAGCCCCCTGAGCAGCCCCGCCCCCCAGGCCGCTGCTTCCTCCCCGGCCCTGGCCGAGTACCCCTTCGACGTGGGGGAAAAGCAGGAAGGGGAGTTGGTCTCGGGGGTGGTGATCCCCGAGGGCAGCTCCCAGGCCGCCTTGATGGTGCGCACCAAGGAAGGCTACGTCTTCTTCGGGGTGGCCAGCCTGGACCGGGCCGGGCGCTTGCAAATGCAGTTCGACCGGGCCTACAAGGGCAAGACCGCCTACGTGGTGCGGGCCATCGCCCTGGACGAACGGGGCGTGGCGGGGATTCCCGCCCAGGTCAGCGAGCAGACCCCCAACCTGCTCACCAACCTGCTGCGGGGGGCCGCCACCGGCCTGGTCAGCTACATCGACTTCTACGCCAAGAGCAGTTCCACTACCATCCTGCCCGGGGGCGGGGTGGCCAGCAGCAACACCCCCCCGCCGCTGGGATTGACCATCCTCTCCGGCTCGGCCAAGCAGATCGCCGCCCCTCCCGACTCGACCAGCGTGGTGCGGGTGTGGAGCCTCGACCCGGGCACCAAGATGCAGATCCTCATCGTCCCCGGGGAGGCCGCGGGTGCTGCGGGTCGCTGA |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_rrnas.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2_rrnas.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +scaffold fasta begin end strand type e-value note +dataset_5328_scaffold_362 dataset_5328 61952 62058 - 5S rRNA 5.2e-16 |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_scaffold.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2_scaffold.fasta Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +>dataset_5328_scaffold_2027 +CCCGCCTGGTGGAGGGAGCCCGCTCCCACCCCGAAGGGCGGCTGCGGCGGTTGGGGGTGGCGGTGCTGTACCTGCTGGGGGAGGCCGGGGTGTGGCCCAGGGAACTCTTCGCCCTGCGCCTGGAGGACTTCCAGCCCGCCGCCCGCGTCTTGCGGGTGCGGGGGGAAAAAGCCCGCAGCGTGCCGCTTTCCAAGGAGGCGACCGAGGCCCTCAAAGCCTATCTGGAAGACCGGGAGAGCGTGGCCGGCCTGGCCCCGCTGCCCTCGCCCTACCTCCTTCTGCGCATGACCCCCAAGAAAGGCGGGCTGGGACGACCCCTCAACCGCGACACCCTGAAGGGGCTGCTGGAGCGGGCGCTGGAGATGGGCGGGCTGGAGCACCCGCGCCCCACCGGGGCCCTGCGCTGGCGGGCGGTGCGGCGGTACTTGCAGCAGGGCCTCTCCCCCCAGGAGGTGGCCCGGCGCACCGGGGTGGCCAGCGTGCTGAGCCTCAAGGACTGAACTTCTCATCTGGCCCGGCGGGCGGCTTTTTCTCCTGTACCATCAGGGCATGGACCTGAATAAACTCATGGGTGGCATGGCCCTGCCCCACGGGGTGGTGTTGATGAGCACGGAGCGGGTGGCGCTGGGCTACTACGACAAAGAGGGAACCTTGCAGCTCTACACCCGCGAACTCAACAATCCCTCGGGCGGCTTGAAAGGCCTGTGGACGTTTTTCTTGGAGGCCGCACGGGCCCTGTGGAAGACCTACCCGCATCAGGGCGAATTTCGGAGCGTCGTGGCGGGGGTGCTGGCGGGGGTGCTGACGGGGATTCCCATCGGGCTGTTCCTGTCCCGGGCCTCGCTGCTTCCCGCTTGGCAGATGCTCCTCTTGAGCACCTCCCTGGTGGTGCTGATGTTTCTGGCGCTGTACCGCTTCTATCCGCCCTTCCGCCAGGGGTTACAGCGCATGGCCCGCTACCACGGGGCCGAGCACAAGATGATCTGGGCCCTCGAGAAAGGCGAGGTCAGCCGGGAAGGAGTCCGGCAGCAGCCGCTGTTGCATCCCGCTTGCGGCAGCAACCTGTTCGCCCTGTACCTGCCGTTCTACCTTCTGAGCTTCCCGCAGTCTCTGCTTGCTCCCGGGTTCTGGTGGTTGCAGCTGCTGATCCTGCCGCTGCTTTTTCCGGTCTTCGGCTGGATGCGCCGCCACCCCGAGCACCCGCTGGCGCGCAGGCTGCTGGCCCTGGGCTACCGCTTCCAGCGCCACACGCTGGCCGAGCCGGGGGAGGCCGAACTGGAGGCGGCCTGGCGGGCTTTGCAGGGGCTGGAAATGGAAACCCCCTCGACTTCAACAGTCGAGGGGGTCAGGGAGCGCTGTGACTACCGGCGGGTAAAGCCCCGCCAGGCCAGATAGATAATCCCGGCAACGACGGCCAACCCCGCCGGGAAGTTGAAAAAAAGTGTGGCCGCGACGGCCACAGTCAGGGTCAAGCTCCCCAAAAAGAACAGTGCTATCATTTTTTCCCTCCTATCGAGGAGGTACCTGTGACTTTTCAAGCTCCCTGGGGGGCCTGAAAAGTTCAGGCCCTCCTGGTTAGCTATTATATATCATTTGGCTCCCATTCGCCAGGCTCGGGGCGGGGAGTGAGGGCTTCCTTGAGGTTCCACTCGGCGGTGCGGTACTCGGTGAGGGCGTAGGCCGGGTGCAGGTAGCCCTCGAAGCACAGCCCGCTGATGCTGCCGTCCAGGAGCACCCCCTGGGGGACCAGCCGGGTGTGGGCGAAGACGTTCAGCAGGGTGCGGAAGGCCCCTTTGACCGCCCGCCGCTCGTCCTCGAGGTGCCAGTGCTGCAACGGCCACAGCTCGTACAGCGGAAAGCCGGGGCCCAGCACCGGGTGGGTGATCTGCAACACCGGCCAGACCGGGGTGAAGGCGGTGAGGGACATGTAGGGGTGCATGGCCAGCAGGTAGATTCCCTGCAAGACCCCCCGCTCGGCCTGTTCCTGGGCCCGCTGTACGAAGACTCCGGGGACGAAGTGCAGGTAGCTGAGATCGACCACCTCCCCCCGCCCGGTCTCCAGCCAGCGCGTTGCGGTGGTGTAGCTCCGGGCGAAGTCGTACAGGGTGGTGGGGGTGAGGGCCGCCAGCAGGGTCAGCGGGTCGGCCAGGTCAGCCAGGTTGCGCACCGGCAGGTCGAAGCGCTGGAGGGCAGCGGGAGCCTGGATCAGGCTGATACCGTGCCAGGTCAGCGCCATGAGCATTCGGGAGGGGATGGAGCGGATCTGTTCGAGGGGGTTGTCCATAAGAGAAAAGGGGAAGGGGCGTTGCCCCTTCCCGTCGGGTCAGGTCCTCAGGGGCATGAGGAGGGCCTGGTAGCCCTCCTGGCCGGTGATCAGCAGCGGGGTGGCGGGGCCGCTGAAGCGCAGGGTGGCCCCTCCGTCGGGCAGGGCCTCGCGGAGGAAGCGACCGTTCAGGGTCAGGTGCAGGGGTTCCCCCTGCACCGCCGCCGGGACCGTCTCCTCAGCGAGGCCGTATGCGTTTTCGCTGCGCAGGTGTAAGGCCTCCTGCTGGAGGGTGAGCTGCACCTTGGAAACCCGGTCTTCCGACAGCGCTTCCAGCCGCTTCAGCGCAGCGGAGAGTTCCGGCCCCACCTCGGCCTGGGCGGGGTAGCGGGTGGGCATCACCCGCTGGTAGTCGGGGAACTCCCCCGAGAGCAGGGGGGTGGCGTAGCGGAAGCGCTCCCCCTGGCAGTAGCACATCTGCTTCTCCAGGGAGAGGTGGACTGATTCCTCCTCGGCCAGAAGGCGCAGCAGATCCCCTACCGCCGCGCCGGGCAGGACTCCCTGGAAGGCCGGGAGAGGGGCGGGCAGGGGGAACTCCTGGAGGGCCAGGCGGTAGCCGTCGGAGGCCACCGCCCGCAGGGTCTGGCCCACCTCCAGCCGCACCCCGGTCAGCACCTTGCGGTACTCCTCCCGCGCTACCGCGTAGCGCACCGCCTCCAGCGCGTCCCGCAAGGACCGGGCGGGGAGGGCGGCCTGCCCCTGGCCGAAGCGGGGGCGGGGGGGTCTATCCAGCCGGTTTGCAGCCGGGCCTGGAAGGTGCCGCCGAGTAGATGAAGCTGTTCCCCCTCCCGCTCCAGCCGGATCTCCTCCGGCATCCGGGCCACCACCTCGGCCAGCAGGGAGCCGTCCAGGGCCAGGCTGAAGCCCGCCGGGGCCGGGGCCTCCAGGGGGACCACCACCTCCAGATCCCGCTGGTGCGAGAAGGACTGGAGCTTGAGGTTCTCCCCGGCGGAGAACCAGACCGTGCGCTCCTGTTGTGGGAGGCCCCGCAACAGGCTGCCAATCGCGTCGTGTAGGGGTTTTTTATCTACCGTCACCATTTTACCTCCTATCGGTGAGCATTATACCAAAAGGAGCTTTTTGCTATACTGAAACGCGATAGGAGGTAAGGATGAAAGTAAAAAATCCAATTTTTGTCCGCCGGATTCTCGAGGAGGGTGCGCCGCTGGAACTCCTCCGGCATGTCGCCCCGGACCTGCCCCTGGAGGATCTGGAAGAGGGACGCAAGCTCCTCCGCCAGGCCATCTGGCCGGAGGACGTGTTCGACGGGGAGCGGCTTTTCCTGCGCCCCCGCACCCCGGCGCAGCGGGCGCTCTTGCAGCGTCTGTAAGTCAGGGAGGCACAGCCGGATTCGCTCCTAAAGGAGCGAATCTTTTATAGGAAGGAGGCCGGATGGTACCGGACTTTGTACTGGCCTATCTGCCCTTTGTGGCCGCGCTGGGGCTGGGAGGGATGGTGGGGCGGCTGTTGCGGGTACCGCTGCGGGCGGCGCTGTGGGGACTGGGCTTGCTGGGGCTGGGCTACGTCCTGCTCGAGCCGGGTCAGGCCCTGGCCCTGGCCCAGGGGTGGGGACGCCGGGGGATAGAGGAGGCCCTGGTGTGGCTGGGGCTGCCCGTGCAGTGGGCGGCCTACGCCTTCAGCCCGCAATGGACCTGGCTGGTGGAGCAGGTCAGCCGGCACCTGGTCCAGCAGGCCCTGGCTCAGGCCGACGCCGGAGCCCTGGAGCGCCTGAACGCCTATCTCCAGCGCGCCGACGTGGGCTTCGTCCTGGGAGTGCTGGCCGGGATCGACCAGCGCAAATAGCTCAACCTCCTAGCCAGGGGGCCAGCAGCACCCGCGTGTGGGGGTCGGGGCCCAGCAGGCGCTGAAGGTACTGCACCCGCGAAAGCCGGGTGGCTCCCCAGTATTGCCGCACGTAGATGCGGCGGTAGTCCTGGGCCTTGCGCAGCACCAGGTCGCGGTTGTAGCGGATGTCGTACTCGATGGCCGCCACCCCTTCCGGGCTCCACCACAAAGCGTCGGGGGTCGCGTCGAGGTGGACCGCAGCGGCGTCGGTCTGCCAGTCTTGGGGGCGGGCTCCCAGGGCGTAGCGCATCTCGGCCACCCCGGCCAGGTGGCGCAGGGCGAAGCCTTCGGCCTCCTGAAGGGCCTCCAGGGCCACGAAGGCCACCGTTCGGGGCCGGGCTTTGGTGATCTCGGCCACCTCCCGCTCCAGCAGCACCGCCCCGGGCAAAGCGGTGGGGTCGAGGCCCGGCCAGCGGCGCTGGAACTGAGACAGGCTCAGGGCCTGGTCGCAGGCCAGGGCTTCCTGGGGGGACTCAGCGACCCGCAGCACCCGCGGCCTCCCCGGGGACGATGAGGATCTGCATCTTGGTGCCCGGGTCGAGGCTCCACACCCGCACCACGCTGGTCGAGTCGGGAGGGGCGGCGATCTGCTTGGCCGAGCCGGAGAGGATGGTCAATCCCAGCGGCGGGGGGGTGTTGCTGCTGGCCACCCCGCCCCCGGGCAGGATGGTAGTGGAACTGCTCTTGGCGTAGAAGTCGATGTAGCTGACCAGGCCGGTGGCGGCCCCCCGCAGCAGGTTGGTGAGCAGGTTGGGGGTCTGCTCGCTGACCTGGGCGGGAATCCCCGCCACGCCCCGTTCGTCCAGGGCGATGGCCCGCACCACGTAGGCGGTCTTGCCCTTGTAGGCCCGGTCGAACTGCATTTGCAAGCGCCCGGCCCGGTCCAGGCTGGCCACCCCGAAGAAGACGTAGCCTTCCTTGGTGCGCACCATCAAGGCGGCCTGGGAGCTGCCCTCGGGGATCACCACCCCCGAGACCAACTCCCCTTCCTGCTTTTCCCCCACGTCGAAGGGGTACTCGGCCAGGGCCGGGGAGGAAGCAGCGGCCTGGGGGGCGGGGCTGCTCAGGGGGCTTTGGCCGGTGGGAACGCTGGGGCTGGAAAGGGAGAGGGAAGCGGGTGTGGGAGCGGGTGTGGGAGCGGGCAGTGCGGCGCTGGCCTGGGGGGCGGGGCTGAGGGGGTCGCTGGAGGAGGTCGGGGCGCTGGTTGGAGGCTCGGCAGCGGCAGACTTTTCCTGCTCCTGTGGAGTTTTCAGGGCCACATACCCTCCGCCGGGCCCGGAGAGGGACTGGATGGCCGGGGTATAGCCGCTGCGCTGGACCTGTTCGCCCTGCAAAACCACCGGGTTGCGGGCTTCGGCTTTGCCCCCCTCGAAGACGGTGTACTTGGTGCTCGTGGGGTTGAGCACGGCGGTGTAGGCCGGTTCCTTGGCGGCTTGTTCCCGCTCCAGCTGAGCGTACTTGCCCTGAGCCGAGGCGGCCTCGCCCTTGAGGGCCACCGGGCCGCTTTCGCTGGCCGTCTGCTGGGCTTGTTGCAGCTTGACCGGGTTGAAGGCCGGTTTGGCCGGGGCCGCAGGGGAAGCGCCGGGCGCGGGGGCCGGGGTGGCGCTGTACTCGGGCAGCGGGGGCGGCGGCACCACCCCGGAGTACGGGCGCTCGCCGGGGGCCGATGGCCCTGCGCTGCTGGGGGGGCTAGGGGGTGGGGGAACGCTGGCGGGGGCGGACGGTGTGGGGCTTTGCGAGGCCGGAGTGGTGGTGGTGGGCAGCGGCGGGGGAGGCGGGGCGGAGGTGGCGGTGGAGGGGGGCGTGGCGCCCGAAGGGGCGGGGGTGGAGGCGACGGGGGGCGGGGTAGGGACGGAATTGGCGTCCGGGCTCGAGGTGGAGGAGGTGTAGCTCACGCTCGACGGACGCTTATGGCTGGCCA |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_trnas.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/annotated2_trnas.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +fasta Name tRNA # Begin End Type Codon Score Note +dataset_5328 dataset_5328_scaffold_1814 1 1368 1292 His GTG 73.0 |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/distill_custom.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/distill_custom.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,32 @@ +gene_id gene_description module sheet header subheader potential_amg +K02981 small subunit ribosomal protein S2e Ribosome, eukaryotes MISC Information systems TRUE +K02985 small subunit ribosomal protein S3e Ribosome, eukaryotes MISC Information systems TRUE +K02984 small subunit ribosomal protein S3Ae Ribosome, eukaryotes MISC Information systems TRUE +K02987 small subunit ribosomal protein S4e Ribosome, eukaryotes MISC Information systems TRUE +K02989 small subunit ribosomal protein S5e Ribosome, eukaryotes MISC Information systems TRUE +K02991 small subunit ribosomal protein S6e Ribosome, eukaryotes MISC Information systems TRUE +K02993 small subunit ribosomal protein S7e Ribosome, eukaryotes MISC Information systems TRUE +K02995 small subunit ribosomal protein S8e Ribosome, eukaryotes MISC Information systems TRUE +K02997 small subunit ribosomal protein S9e Ribosome, eukaryotes MISC Information systems TRUE +K02947 small subunit ribosomal protein S10e Ribosome, eukaryotes MISC Information systems TRUE +K02949 small subunit ribosomal protein S11e Ribosome, eukaryotes MISC Information systems TRUE +K02951 small subunit ribosomal protein S12e Ribosome, eukaryotes MISC Information systems TRUE +K02953 small subunit ribosomal protein S13e Ribosome, eukaryotes MISC Information systems TRUE +K02955 small subunit ribosomal protein S14e Ribosome, eukaryotes MISC Information systems TRUE +K02958 small subunit ribosomal protein S15e Ribosome, eukaryotes MISC Information systems TRUE +K02957 small subunit ribosomal protein S15Ae Ribosome, eukaryotes MISC Information systems TRUE +K02960 small subunit ribosomal protein S16e Ribosome, eukaryotes MISC Information systems TRUE +K02962 small subunit ribosomal protein S17e Ribosome, eukaryotes MISC Information systems TRUE +K02964 small subunit ribosomal protein S18e Ribosome, eukaryotes MISC Information systems TRUE +K02966 small subunit ribosomal protein S19e Ribosome, eukaryotes MISC Information systems TRUE +K02969 small subunit ribosomal protein S20e Ribosome, eukaryotes MISC Information systems TRUE +K02971 small subunit ribosomal protein S21e Ribosome, eukaryotes MISC Information systems TRUE +K02973 small subunit ribosomal protein S23e Ribosome, eukaryotes MISC Information systems TRUE +K02974 small subunit ribosomal protein S24e Ribosome, eukaryotes MISC Information systems TRUE +K02975 small subunit ribosomal protein S25e Ribosome, eukaryotes MISC Information systems TRUE +K02976 small subunit ribosomal protein S26e Ribosome, eukaryotes MISC Information systems TRUE +K02978 small subunit ribosomal protein S27e Ribosome, eukaryotes MISC Information systems TRUE +K02977 small subunit ribosomal protein S27Ae Ribosome, eukaryotes MISC Information systems TRUE +K02979 small subunit ribosomal protein S28e Ribosome, eukaryotes MISC Information systems TRUE +K02980 small subunit ribosomal protein S29e Ribosome, eukaryotes MISC Information systems TRUE +K02983 small subunit ribosomal protein S30e Ribosome, eukaryotes MISC Information systems TRUE |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/input_annotate1.fasta.gz |
b |
Binary file test-data/input_annotate1.fasta.gz has changed |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/input_distill_rrna1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_distill_rrna1.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,2 @@ +scaffold fasta begin end strand type e-value note +dataset_4924_scaffold_361 dataset_4924 61952 62058 - 5S rRNA 5.2e-16 |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/input_distill_trna1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_distill_trna1.tabular Sat Dec 10 21:14:28 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +fasta Name tRNA # Begin End Type Codon Score Note +dataset_4924 dataset_4924_scaffold_1814 1 1368 1292 His GTG 73.0 +dataset_4924 dataset_4924_scaffold_1814 2 1285 1214 Gln TTG 60.8 +dataset_4924 dataset_4924_scaffold_1814 3 1182 1110 Lys TTT 77.5 +dataset_4924 dataset_4924_scaffold_361 1 29420 29495 Met CAT 73.6 +dataset_4924 dataset_4924_scaffold_361 2 56462 56380 Leu CAA 63.7 +dataset_4924 dataset_4924_scaffold_361 3 12681 12586 Cys GCA 26.9 pseudo |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/neighborhoods_genes_loc1.fna --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/neighborhoods_genes_loc1.fna Sat Dec 10 21:14:28 2022 +0000 |
[ |
b'@@ -0,0 +1,1038 @@\n+>dataset_5073_scaffold_1510_1 rank: D; Sigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28] (db=pfam)\n+ATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAA\n+>dataset_5073_scaffold_1510_2 rank: D; AMP-binding enzyme [PF00501.31]; AMP-binding enzyme C-terminal domain [PF13193.9] (db=pfam)\n+ATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAA\n+>dataset_5073_scaffold_1510_3 rank: D; 2-hydroxyglutaryl-CoA dehydratase, D-component [PF06050.16] (db=pfam)\n+ATGACTGACCGTAAAACCATTAAAGAAATCTGTGCACAATTTAAAGAAATTATTGCCGAGCCTGGTCTAAAGATTCAGCGGCTGCAAGCCGAAAAACCTGCTCCGGTAATCGGGTTTTTGCCCACTGACGTGCCTGAAGAATTAATCCATGCCTCGGGCGCTTACCCCTTCGGGCTGGTGGCTTATGATGGATTATGGGTCAACCGGGCCGACGCCCACTTGCAGACCTGGGCATGCTCTCTGGCACGATGTTCCTTCGGGATGTCTCTGGCCGGGAAGTTCGATTACCTGAACGGGCTGATCATCCCTCATATTTGCGACACCACCCGGATGATTTCAGACATTTGGAAACAAAACCGGCCTTACGATTTCATGGAGAACTTTATACTGCCCCGGCAGGTTGATCGTCCCAGTGCCAGGAGTTATCTCACCGGTGAATTGGGCCGGTTGAAGGCGCGTTTGGAGCAGTTTACGGGCAGGTCTAT'..b'CCTTTTCCGGGCTGGACCCTGCCGGGAGTGATCGGGGCGGGAGCCGCCCAGACTATGATGAATATCCACGGCCTAAAGCCGGGCAACCGGGTTTTGATGGTGGGCTCGGGCAACGTGGGGCTGGTGGTGGGGTACCAGCTGCTGCAGGCCGGTTGTCAGCTGGCGGCAGTTATAGACGTTACTTCACGCATCGGCGGCTACGGGGTGCATGCTGCCAAGGTAGCCCGCACCGGTGTACCTTTTCTCATGTCCCATACCATTAAGGAGGCCTGTGGGACCACCCGGGTGGAGGGAGCCACTATTATCCAGGTGGATGGACATTGGCAACCTGTCCCCGGCAGCGAAAAGCACCTGGAAGTAGATACAATCTGCCTGGCGGTGGGCCTGTCTCCCATGTCTCAGCTGGCCAGGATGTCTCACTGCTGGATGGAAAACAACTCCGGCCGGGGGGGGATGGTCCCGATTTGCAATGAATACGGGGAAACCTCTTTGCCCGGCGTGTACGCCGTGGGGGATGTGGCAGGTATTGAAGAGGCCAGTTCGGCCATGATCCAGGGCCGCGTCGCCGGGGCTGCGGTGGCCCGGGCGCGGGGCTACCTGGGTGAGGCTGAGTTTAAGGACCGTTATGGGGATTATCACTCCTCCCTGGGGCAGTTGCGGGAAGGAATGTTTGGACACAAAAATAAAGGGCGTACCGACCTGACTCATACCGAAGAAGGCTATGCGCTTTCCCGGACGCTGCTGGCCAGGGGCTATCTGGCCGAAGAGGAACTTGCGGGTTATTCCGGCGTTTGCTCCGGGGAAAAAAGGAAAAACGGTGTTTTTCCCATTATTGAGTGTACTCAGAACATCCCCTGCAATCCCTGCCGGGACGCTTGCAAGCAGGGCTGTATCAAGGTGAGCGGTAAAATCACCAATCTGCCCGTCGTTGACGAATCGGTTTCCTGTACGGGCTGCGGGATGTGTGTTGTGTCCTGTCCCGGTCAGGCTATCTTCCTGGTTGATGAAAGCTATGCACCGGGCTATGCGGCGGTGTCCATACCTTACGAATTTTACCCCCTGCCGGAGGTGGGGGCCAGGGGTTCGGCCCTGGACCGCTCGGGAGCAGTTGTGGGAGAAGCGGAAGTTATCGGGGTTAAAATCACCAGGGCTATGGACGAGACGGCGGTCCTGACTATGAAAGTACCGCTCGACTGCTCAATGAAGGCAAGATTTTTTAAACCCCTTTAA\n+>dataset_5073_scaffold_988_16 rank: D; Sarcosine oxidase A3 domain [PF17806.4]; BFD-like [2Fe-2S] binding domain [PF04324.18] (db=pfam)\n+ATGAGCAAGAGAATTATGCGAACCCCGCCTGAGGGTGAATTTGTAGCCCGGCCGGACGATTCGCTGATCATTTGTCGGTGTGAGGAAATCACCAGGGGAGAAATCAGAAGAGCGATATACGATGGAATGCGCACAATGAATGAAATTAAAAGGTACCTGCGGGTGGGCATGGGGCTCTGCCAGGGACAGACTTGCAATCGCCTGGTTCAGGGCATTATGGCTAAAGAGCTGGGACTGGATCCGGTTGAGGTGGACATCCCTTTATCCCGCTCACCGGCCCGGCCCATTCCTATGAGTGTATACGCCAACGACGGGGTTACAAAAGAAAAAGGTGAAAGAAAATATGAATAA\n+>dataset_5073_scaffold_988_17 rank: D; FAD dependent oxidoreductase [PF01266.27] (db=pfam)\n+ATGAATAAAAAGGAAATTATCATTGTCGGCGGCGGGGTGATCGGCTGCGCCCTGGCCTATTATCTGACCAAGCTAAAAATAAAAGCCCTGGTCATCGAAAAGAATGAGATCGGCATAGAAGCTTCCAGCCGCAACGGCGGCGGGGTCCGGCAGTCGGCCAGGGATTTGCGGGAGATGCCTCTGGCCAGGCATGCCGTGCAAAACCTTTGGCCGGGCCTTTCGGATGAACTGGGAGTTGACGTGGAATACGAGCGGAAAGGGAACCTGCGCCTGGGTAAAACCGAAGAGCATGCCAAAATTCTGGAGCGGATTGTCAGCCAGGGGCGTTCGGCGGGTCTTGACTTAAAACTCATCGACAGGCAAGAGGTGCGGGAAATCTGCCCTTATGCTTCGGAGGAAGTTATGGTGGCCAGCTACTGCCCCACAGACGGGCATGCCAACCCCATGCGGACCACCCTGGCTTTTTACAAAAGGGCCAGAGAAATGGGGGCCGAATTTGTTACCGGGGAAACAGTGCAGTCCATCCTGTTGCGTAAAGGTAAGGTAGGTGGCATTAAAACCGGTGCGGGTACTTATGAATCAGACCAGGTGTTGGTGGCAGCCGGTTTTGCTTCCCGGTTCATCGCTAATTCGGTGGGCATTGACGTGCCCATGCAAAAGGTGCTGGTGGAGGCCCTGGTGACGGGCCAACAGCCTCCCATGTTTCCCCAGATGATCGGTACGGCCGGTTCCGACTTTTACGGTCACCAGACCAAACACGGCTCTTTTGTCTTTGGGGGGATGACCGGCTTGGAACCATTTGCCTCGGAGGAATCCAGGCCTATGACCAGGAACATCACCGCCCCCAGCATCTGCCGGGCCATTCTCGGTTATTTTCCCGTCCTGGATCAGGCTGATATTATCCGCACCTGGTCGGGCTTTCTGGACGTAACAGCCGACCATGTCCCCGTATTAAGCAAGGTGGACGAAATACCGGGGCTGTTCCTGGCCTGCGGTTTCAGCGGACATGGCTACGGTATATCACCTGCTGTGGGACAGGTCATGGCGGAATTGGTGATCCATGACCGGCCGTCGCTGTCCCTGGACGCTTTTCGCTATGACCGCTTTATTCCCAAAAAATAA\n+>dataset_5073_scaffold_988_18 rank: C; 2-iminobutanoate/2-iminopropanoate deaminase [EC:3.5.99.10] (db=kegg)\n+ATGAATATGGAGAGAACTAATTATTCGTCCGGGGCACCCCTGGAAGATAAGGCGGGCTACAGCCGGGTGGTCAAGGTGGGGCCCTTCGTATACGTCGGCGGGACCACTTCGGTTCAGCCCGACGGCAGTGTCTACGGGGAAAATGACGGCTATGCCCAGACGAAATACATTTTGGAAAGGATGATTGGTTTTCTGGAGCAGGCCGGGTCCAGGCGGGAGGAAGTAATCAGGGTCAAGATGTATGCCACGGACATGACCCGGGCCAAGGAATACATTGAGGCTTACTCGGAATTTTTTAAGGACATCAAACCCCTGTGTACCCTGGTGGGGATATCCACCCTGTTTCGTCCTGCCCAGCTCATCGAAATAGAAATGGACGCTGTGATTGGATCAGCAAACTAG\n+>dataset_5073_scaffold_988_19 rank: D; Family of unknown function (DUF6144) [PF19641.2] (db=pfam)\n+ATGAATATGCCTATTGACAAGAATAAAAGGTGGATAACAGGGTTGCACGAAAGCATTAACCAACTTGGTAAAGATCTGCAAGCCGCAATAATGAAACCAGTTGGTAAGCAATGCGCTTCAGACCTATTCTCATTATGTGAAAGCTATTTGAGAAATAAAATAGATACAACTGAAGATCTTATAAATGGTTGGAATACTCTTCGGGAGAAACGAAACTTGAAAGGGAAATGGGAACTTGAAGGGGATAAAATCCGAGGCACCTTTTACGAATGTGGTTGTCCACTTATTCGATCGGGAATGATCGATCTTCATCCTATCCAATACTATTGCTCCCAAGGAATGATGGAGATGATTTTTTCCAAAGCAGCGAAAAAAACGGTAAAGGTTGAGATTAAACGTTCGATTGGTTGGGGTGACGATGTATGCGAATTCTTAATAAAACTCTAA\n' |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/neighborhoods_scaffolds_loc1.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/neighborhoods_scaffolds_loc1.fasta Sat Dec 10 21:14:28 2022 +0000 |
b |
b'@@ -0,0 +1,38 @@\n+>dataset_5073_scaffold_1510\n+TACAGGATGCTTTTTTAAACCAGGTTAGAAAGGATAATATACCTGTAACAATCTTTTTGGTTAATGGATTTCAATTAAAAGGTATGGTTAAGGGTTTTGATAATTTTACAGTAATTATGGAAAGTGACGGCAAACAAATGATGGTATATAAACACGCTATTTCTACAGTTAGTCCTATGAAACCGGTAAATACTTCTTTTTCGGAAGTTAAGCCTGGCTAAATAAAAGGTGTAGGTCATGTCTTTTAGTATATTCCACTTAGAAAACATTCCAAATTCAATCATTCCGCCTGGCTTTTCATTGAAAAGCAGGCGGTTTCTATTTTATAGCGCAACTTTGGTATTGATTGGGATGACTATAATAGTCCATAATTACACATATACATAATCATTTAATCAAGCGTGGTTATGTGACTGTAATTCCACTGTTCGTTTGGAATTGAATATACTCTTTGGGATTGAGAAGGGAAGTGAATAAATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAAATGTTTTTTCAAGTTTAAGTATAAATTAAAAAACTAGGTCAAAGTTGAATGTCATGTTTAACCTTGTCAAAAGTCTTTATTTTAGTACAATAACAGTTTCATATTTGGTTTTGTATGTCAAATTTGACACCCTCAAAATCTGACAATTCTGAAGATCGGGCTGAAAAGATGGTCTAAAAGTTGGCACGTATTTTGCGTAAAATACCATTATCATACATGAATAGAACATTAATGGTTAAGGAGGGAACATCTATGACAGCTTGAGGGAATGCTTTGCCTGTTCTATGGTGGATGTTTTCAGAACCTTCGGACTTGTCAAAAGAGAGGGATATGAAATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAAAATATAAGTCAGAATATAATGGAATATAGGAAAGATAATGGATTATTTAATAAGTTAATCCAAAGCAGCAATGGTGCCAGGTGTTTAAAGGTTGA'..b'TTGAAGAGGCCAGTTCGGCCATGATCCAGGGCCGCGTCGCCGGGGCTGCGGTGGCCCGGGCGCGGGGCTACCTGGGTGAGGCTGAGTTTAAGGACCGTTATGGGGATTATCACTCCTCCCTGGGGCAGTTGCGGGAAGGAATGTTTGGACACAAAAATAAAGGGCGTACCGACCTGACTCATACCGAAGAAGGCTATGCGCTTTCCCGGACGCTGCTGGCCAGGGGCTATCTGGCCGAAGAGGAACTTGCGGGTTATTCCGGCGTTTGCTCCGGGGAAAAAAGGAAAAACGGTGTTTTTCCCATTATTGAGTGTACTCAGAACATCCCCTGCAATCCCTGCCGGGACGCTTGCAAGCAGGGCTGTATCAAGGTGAGCGGTAAAATCACCAATCTGCCCGTCGTTGACGAATCGGTTTCCTGTACGGGCTGCGGGATGTGTGTTGTGTCCTGTCCCGGTCAGGCTATCTTCCTGGTTGATGAAAGCTATGCACCGGGCTATGCGGCGGTGTCCATACCTTACGAATTTTACCCCCTGCCGGAGGTGGGGGCCAGGGGTTCGGCCCTGGACCGCTCGGGAGCAGTTGTGGGAGAAGCGGAAGTTATCGGGGTTAAAATCACCAGGGCTATGGACGAGACGGCGGTCCTGACTATGAAAGTACCGCTCGACTGCTCAATGAAGGCAAGATTTTTTAAACCCCTTTAAACTATAAAGCAATTTAACCTACTAACTTGGCGTAAGTCTCCCACTTCTATAAGTGGAGTACAACGCCAACTAAGTCATACATTTTGCAGTTCTAAAATTCAGGGAGAGTTGAATCTCCCCCTGAATTAAGAGCTTGCTTCAAAGTTAGCGGAGCCATGGGAACGATTCGAGTGTCCGGGAAGTGTGGCTAAAAAATCGAGGACATTCACCGAAGCGCTGTGTTCCTTTCCTTAACGGAAAAAGACGATGGACCGTTCTTGTGGCGCAGCTTCTTACAAGGAGGCCTGTAATGAGCAAGAGAATTATGCGAACCCCGCCTGAGGGTGAATTTGTAGCCCGGCCGGACGATTCGCTGATCATTTGTCGGTGTGAGGAAATCACCAGGGGAGAAATCAGAAGAGCGATATACGATGGAATGCGCACAATGAATGAAATTAAAAGGTACCTGCGGGTGGGCATGGGGCTCTGCCAGGGACAGACTTGCAATCGCCTGGTTCAGGGCATTATGGCTAAAGAGCTGGGACTGGATCCGGTTGAGGTGGACATCCCTTTATCCCGCTCACCGGCCCGGCCCATTCCTATGAGTGTATACGCCAACGACGGGGTTACAAAAGAAAAAGGTGAAAGAAAATATGAATAAAAAGGAAATTATCATTGTCGGCGGCGGGGTGATCGGCTGCGCCCTGGCCTATTATCTGACCAAGCTAAAAATAAAAGCCCTGGTCATCGAAAAGAATGAGATCGGCATAGAAGCTTCCAGCCGCAACGGCGGCGGGGTCCGGCAGTCGGCCAGGGATTTGCGGGAGATGCCTCTGGCCAGGCATGCCGTGCAAAACCTTTGGCCGGGCCTTTCGGATGAACTGGGAGTTGACGTGGAATACGAGCGGAAAGGGAACCTGCGCCTGGGTAAAACCGAAGAGCATGCCAAAATTCTGGAGCGGATTGTCAGCCAGGGGCGTTCGGCGGGTCTTGACTTAAAACTCATCGACAGGCAAGAGGTGCGGGAAATCTGCCCTTATGCTTCGGAGGAAGTTATGGTGGCCAGCTACTGCCCCACAGACGGGCATGCCAACCCCATGCGGACCACCCTGGCTTTTTACAAAAGGGCCAGAGAAATGGGGGCCGAATTTGTTACCGGGGAAACAGTGCAGTCCATCCTGTTGCGTAAAGGTAAGGTAGGTGGCATTAAAACCGGTGCGGGTACTTATGAATCAGACCAGGTGTTGGTGGCAGCCGGTTTTGCTTCCCGGTTCATCGCTAATTCGGTGGGCATTGACGTGCCCATGCAAAAGGTGCTGGTGGAGGCCCTGGTGACGGGCCAACAGCCTCCCATGTTTCCCCAGATGATCGGTACGGCCGGTTCCGACTTTTACGGTCACCAGACCAAACACGGCTCTTTTGTCTTTGGGGGGATGACCGGCTTGGAACCATTTGCCTCGGAGGAATCCAGGCCTATGACCAGGAACATCACCGCCCCCAGCATCTGCCGGGCCATTCTCGGTTATTTTCCCGTCCTGGATCAGGCTGATATTATCCGCACCTGGTCGGGCTTTCTGGACGTAACAGCCGACCATGTCCCCGTATTAAGCAAGGTGGACGAAATACCGGGGCTGTTCCTGGCCTGCGGTTTCAGCGGACATGGCTACGGTATATCACCTGCTGTGGGACAGGTCATGGCGGAATTGGTGATCCATGACCGGCCGTCGCTGTCCCTGGACGCTTTTCGCTATGACCGCTTTATTCCCAAAAAATAATCACAAACCACAAACAGGAGGGAAATGTTGATGAATATGGAGAGAACTAATTATTCGTCCGGGGCACCCCTGGAAGATAAGGCGGGCTACAGCCGGGTGGTCAAGGTGGGGCCCTTCGTATACGTCGGCGGGACCACTTCGGTTCAGCCCGACGGCAGTGTCTACGGGGAAAATGACGGCTATGCCCAGACGAAATACATTTTGGAAAGGATGATTGGTTTTCTGGAGCAGGCCGGGTCCAGGCGGGAGGAAGTAATCAGGGTCAAGATGTATGCCACGGACATGACCCGGGCCAAGGAATACATTGAGGCTTACTCGGAATTTTTTAAGGACATCAAACCCCTGTGTACCCTGGTGGGGATATCCACCCTGTTTCGTCCTGCCCAGCTCATCGAAATAGAAATGGACGCTGTGATTGGATCAGCAAACTAGTACTACAGCCGCTTGTCATTGCGAGCGTTAGCCAAGCAATCTTTGGCTGTTACCCCCACTAATTGGGGACATTCCTCCGAAGGAGGTATGTCCCCTTTTTTTAGCCTTATTATTTTATCATGAGCCAGAAGCTTTTTTGAGGAACAACAAACCCTATTGGAACTTTAGGCAAGCTGGACGAGGTCGAAGAACAAGGCCGTGCTGGAAAGTGCTGCGTTCTGCGATGTTATGGCAGATTTAAGGAGCCGTATCCATGCTAAAGTATCCAACTAATATAATCGAAGCTGCAGAACAGGATTTAGCGAGTCTCCCGGAAATTATGAGCAAGAGAAGAATACTGGTAACCTTTAATTAGAGTTTTATTAAGAATTCGCATACATCGTCACCCCAACCAATCGAACGTTTAATCTCAACCTTTACCGTTTTTTTCGCTGCTTTGGAAAAAATCATCTCCATCATTCCTTGGGAGCAATAGTATTGGATAGGATGAAGATCGATCATTCCCGATCGAATAAGTGGACAACCACATTCGTAAAAGGTGCCTCGGATTTTATCCCCTTCAAGTTCCCATTTCCCTTTCAAGTTTCGTTTCTCCCGAAGAGTATTCCAACCATTTATAAGATCTTCAGTTGTATCTATTTTATTTCTCAAATAGCTTTCACATAATGAGAATAGGTCTGAAGCGCATTGCTTACCAACTGGTTTCATTATTGCGGCTTGCAGATCTTTACCAAGTTGGTTAATGCTTTCGTGCAACCCTGTTATCCACCTTTTATTCTTGTCAATAGGCATATTCATAGTTAATTGCTATTCCTCCTTTAGGGCTTGCTTCAAGTCTTGGATAAGATCACCGGCGTTCTCGATTCCTACCGAAAGTCTCAAGGATGAAGAAAAAGATCGTAATAGGTTCACGAGACTACGGTCTTGTTCTACCGGTGGTCAACTGTTTAGATATACGAGTGATCAGCATTTATTAATTAATGTTAAGAACTAATGAAAAGAATGTAAATGTTAAGTTTATAAATTAATTACTCAACTTTCGCAGTTCAAAAATCAGTCTAACCCCTTACAAAATAAGAATTTAGGCCAACTTTTTTAAGCAAAAACA\n' |
b |
diff -r 000000000000 -r 2675f8d7b2a5 test-data/strainer_input_fasta1.fasta.gz |
b |
Binary file test-data/strainer_input_fasta1.fasta.gz has changed |