view test-data/otu_s1_rps.tab @ 2:fd7104249a3c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author iuc
date Wed, 21 Aug 2024 13:13:28 +0000
parents bbaa89f070f4
children
line wrap: on
line source

#query_id	query_length	cdd_id	hit_id	evalue	startQ	endQ	frame	description	superkingdom	no rank	family	genus
"ds2020-267_100"	"376"	"pfam02823"	"gnl|CDD|376940"	"3.06167e-09"	"228"	"347"	"-3"	"pfam02823, ATP-synt_DE_N, ATP synthase, Delta/Epsilon chain, beta-sandwich domain.  Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. The subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (pfam00213)."	"Bacteria(0.97);Eukaryota(0.03);"	"(1.00);"	"Lactobacillaceae(0.05);Rhodobacteraceae(0.04);Streptococcaceae(0.03);Bacillaceae(0.03);Burkholderiaceae(0.02);"	"Lactobacillus(0.04);Streptococcus(0.03);Bacillus(0.02);Mycoplasma(0.02);Synechococcus(0.01);"
"ds2020-267_100"	"376"	"pfam00401"	"gnl|CDD|366077"	"8.90041e-05"	"87"	"218"	"-3"	"pfam00401, ATP-synt_DE, ATP synthase, Delta/Epsilon chain, long alpha-helix domain.  Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. This subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (pfam00213)."	"Bacteria(0.97);Eukaryota(0.03);"	"(1.00);"	"(0.06);Clostridiaceae(0.05);Lachnospiraceae(0.05);Bacillaceae(0.04);Peptococcaceae(0.04);"	"(0.06);Clostridium(0.05);Lactobacillus(0.03);Bacillus(0.03);Eubacterium(0.02);"
"ds2020-267_114"	"347"	"pfam00471"	"gnl|CDD|376336"	"8.05888e-12"	"132"	"302"	"3"	"pfam00471, Ribosomal_L33, Ribosomal protein L33.  "	"Bacteria(0.86);Eukaryota(0.14);"	"(1.00);"	"(0.07);Mycoplasmataceae(0.07);Clostridiaceae(0.06);Bacillaceae(0.03);Lactobacillaceae(0.03);"	"Mycoplasma(0.06);Clostridium(0.05);(0.04);Lactobacillus(0.02);Bacillus(0.02);"
"ds2020-267_117"	"344"	"pfam00252"	"gnl|CDD|376306"	"7.27175e-23"	"107"	"295"	"2"	"pfam00252, Ribosomal_L16, Ribosomal protein L16p/L10e.  "	"Bacteria(0.58);Eukaryota(0.29);Archaea(0.13);"	"(1.00);"	"(0.08);Clostridiaceae(0.03);Mycoplasmataceae(0.03);Spirochaetaceae(0.02);"	"(0.04);Clostridium(0.03);Mycoplasma(0.02);"
"ds2020-267_118"	"343"	"pfam00421"	"gnl|CDD|366090"	"7.68219e-41"	"92"	"337"	"-1"	"pfam00421, PSII, Photosystem II protein.  "	"Bacteria(0.79);Eukaryota(0.21);"	"(1.00);"	"Gloeobacteraceae(0.14);Synechococcaceae(0.14);Prochloraceae(0.14);Acaryochloridaceae(0.14);Nostocaceae(0.07);"	"Acaryochloris(0.14);Gloeobacter(0.14);Prochlorococcus(0.14);Synechococcus(0.14);Nostoc(0.07);"
"ds2020-267_120"	"339"	"pfam16639"	"gnl|CDD|374695"	"2.20279e-25"	"197"	"325"	"-3"	"pfam16639, Apocytochr_F_N, Apocytochrome F, N-terminal.  This is the N-terminal domain of cytochrome f. It is a soluble lumen-side domain."	"Bacteria(0.75);Eukaryota(0.25);"	"(1.00);"	"Synechococcaceae(0.25);Gloeobacteraceae(0.07);Prochloraceae(0.07);Aphanothecaceae(0.07);(0.07);"	"Synechococcus(0.21);Prochlorococcus(0.07);Gloeobacter(0.07);Oscillatoria(0.04);Aureococcus(0.04);"
"ds2020-267_130"	"330"	"pfam00680"	"gnl|CDD|366242"	"7.64962e-05"	"124"	"282"	"1"	"pfam00680, RdRP_1, RNA dependent RNA polymerase.  "	"Viruses(1.00);"	"Riboviria(1.00);"	"Caliciviridae(0.30);Picornaviridae(0.30);Secoviridae(0.20);Potyviridae(0.20);"	"Vesivirus(0.20);Aphthovirus(0.10);Sequivirus(0.10);Bymovirus(0.10);Potyvirus(0.10);"
"ds2020-267_139"	"320"	"pfam05860"	"gnl|CDD|368641"	"1.34887e-13"	"167"	"298"	"2"	"pfam05860, Haemagg_act, haemagglutination activity domain.  This domain is suggested to be a carbohydrate- dependent haemagglutination activity site. It is found in a range of haemagglutinins and haemolysins."	"Bacteria(1.00);"	"(1.00);"	"Nostocaceae(0.36);Burkholderiaceae(0.14);Pasteurellaceae(0.14);Pseudomonadaceae(0.12);Neisseriaceae(0.07);"	"Nostoc(0.36);Ralstonia(0.14);Pseudomonas(0.12);Haemophilus(0.10);Neisseria(0.07);"
"ds2020-267_145"	"315"	"pfam02626"	"gnl|CDD|376868"	"3.97676e-05"	"140"	"256"	"-3"	"pfam02626, CT_A_B, Carboxyltransferase domain, subdomain A and B.  Urea carboxylase (UC) catalyzes a two-step, ATP- and biotin-dependent carboxylation reaction of urea. It is composed of biotin carboxylase (BC), carboxyltransferase (CT), and biotin carboxyl carrier protein (BCCP) domains. The CT domain of UC consists of four subdomains, named A, B, C and D. This domain covers the A and B subdomains of the CT domain. This domain covers the whole length of KipA (kinase A) from Bacillus subtilis. It can also be found in S. cerevisiae urea amidolyase Dur1,2, which is a multifunctional biotin-dependent enzyme with domains for urea carboxylase and allophanate (urea carboxylate) hydrolase activity."	"Bacteria(0.86);Eukaryota(0.13);Archaea(0.01);"	"(1.00);"	"Bacillaceae(0.03);Pseudonocardiaceae(0.03);Clostridiaceae(0.03);Corynebacteriaceae(0.03);Streptomycetaceae(0.03);"	"Clostridium(0.03);Corynebacterium(0.03);Pseudomonas(0.02);Streptomyces(0.02);Bacillus(0.02);"
"ds2020-267_16"	"1165"	"pfam02123"	"gnl|CDD|280316"	"1.58664e-38"	"536"	"1078"	"-1"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_168"	no_hit
"ds2020-267_176"	"291"	"pfam01235"	"gnl|CDD|376500"	"1.13408e-29"	"115"	"243"	"1"	"pfam01235, Na_Ala_symp, Sodium:alanine symporter family.  "	"Bacteria(1.00);"	"(1.00);"	"Bacillaceae(0.10);Lachnospiraceae(0.07);Clostridiaceae(0.06);(0.06);Corynebacteriaceae(0.03);"	"Bacillus(0.06);Clostridium(0.06);(0.06);Corynebacterium(0.03);Blautia(0.03);"
"ds2020-267_187"	"287"	"pfam02673"	"gnl|CDD|376891"	"3.59766e-13"	"7"	"156"	"1"	"pfam02673, BacA, Bacitracin resistance protein BacA.  Bacitracin resistance protein (BacA) is a putative undecaprenol kinase. BacA confers resistance to bacitracin, probably by phosphorylation of undecaprenol. More recent studies show that BacA has undecaprenyl pyrophosphate phosphatase activity. Undecaprenyl phosphate is a key lipid intermediate involved in the synthesis of various bacterial cell wall polymers. Bacitracin, a mixture of related cyclic polypeptide antibiotics, is used to treat surface tissue infections. Its primary mode of action is the inhibition of bacterial cell wall synthesis through sequestration of the essential carrier lipid undecaprenyl pyrophosphate, C55-PP, resulting in the loss of cell integrity and lysis. The characteristic phosphatase sequence-motif in this family is likely to be the PGxSRSGG, compared with the PSGH of the PAP family of phosphatases."	"Bacteria(0.97);Archaea(0.03);"	"(1.00);"	"Clostridiaceae(0.08);(0.06);Lachnospiraceae(0.03);Ruminococcaceae(0.02);Bacillaceae(0.02);"	"Clostridium(0.08);(0.04);Bacillus(0.02);Eubacterium(0.02);Prevotella(0.01);"
"ds2020-267_2"	"2436"	"pfam02123"	"gnl|CDD|280316"	"2.17343e-21"	"184"	"1476"	"1"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_20"	"893"	"pfam00006"	"gnl|CDD|376291"	"4.59259e-49"	"498"	"866"	"3"	"pfam00006, ATP-synt_ab, ATP synthase alpha/beta family, nucleotide-binding domain.  This entry includes the ATP synthase alpha and beta subunits, the ATP synthase associated with flagella and the termination factor Rho."	"Bacteria(0.88);Archaea(0.07);Eukaryota(0.05);"	"(1.00);"	"(0.07);Mycoplasmataceae(0.06);Clostridiaceae(0.04);Spirochaetaceae(0.03);Rhodobacteraceae(0.02);"	"Mycoplasma(0.06);(0.03);Clostridium(0.03);Treponema(0.01);Persephonella(0.01);"
"ds2020-267_20"	"893"	"pfam02874"	"gnl|CDD|367225"	"7.30466e-19"	"126"	"329"	"3"	"pfam02874, ATP-synt_ab_N, ATP synthase alpha/beta family, beta-barrel domain.  This family includes the ATP synthase alpha and beta subunits the ATP synthase associated with flagella."	"Bacteria(0.60);Eukaryota(0.28);Archaea(0.13);"	"(1.00);"	"Spirochaetaceae(0.04);Bacillaceae(0.04);Schizosaccharomycetaceae(0.03);Chlamydomonadaceae(0.03);Sulfolobaceae(0.03);"	"Treponema(0.04);Schizosaccharomyces(0.03);Chlamydomonas(0.03);Bacillus(0.03);Thermotoga(0.02);"
"ds2020-267_203"	"281"	"pfam03040"	"gnl|CDD|367312"	"1.76794e-22"	"121"	"231"	"-3"	"pfam03040, CemA, CemA family.  Members of this family are probable integral membrane proteins. Their molecular function is unknown. CemA proteins are found in the inner envelope membrane of chloroplasts but not in the thylakoid membrane. A cyanobacterial member of this family has been implicated in CO2 transport, but is probably not a CO2 transporter itself. They are predicted to be haem-binding however this has not been proven experimentally."	"Bacteria(0.77);Eukaryota(0.23);"	"(1.00);"	"Synechococcaceae(0.17);Aphanothecaceae(0.08);Microcoleaceae(0.06);Chlorellaceae(0.04);Merismopediaceae(0.04);"	"Synechococcus(0.13);Oscillatoria(0.04);Physcomitrella(0.04);Microcystis(0.04);Synechocystis(0.04);"
"ds2020-267_206"	"279"	"pfam03947"	"gnl|CDD|377170"	"2.63457e-16"	"155"	"265"	"2"	"pfam03947, Ribosomal_L2_C, Ribosomal Proteins L2, C-terminal domain.  "	"Bacteria(0.62);Eukaryota(0.27);Archaea(0.11);"	"(1.00);"	"(0.09);Mycoplasmataceae(0.03);Clostridiaceae(0.02);Spirochaetaceae(0.01);Ruminococcaceae(0.01);"	"(0.05);Mycoplasma(0.02);Clostridium(0.02);"
"ds2020-267_21"	"858"	"pfam00680"	"gnl|CDD|366242"	"8.36679e-11"	"295"	"729"	"-1"	"pfam00680, RdRP_1, RNA dependent RNA polymerase.  "	"Viruses(1.00);"	"Riboviria(1.00);"	"Caliciviridae(0.30);Picornaviridae(0.30);Secoviridae(0.20);Potyviridae(0.20);"	"Vesivirus(0.20);Aphthovirus(0.10);Sequivirus(0.10);Bymovirus(0.10);Potyvirus(0.10);"
"ds2020-267_210"	"276"	"pfam17917"	"gnl|CDD|375428"	"1.59337e-13"	"52"	"240"	"1"	"pfam17917, RT_RNaseH, RNase H-like domain found in reverse transcriptase.  DNA polymerase and ribonuclease H (RNase H) activities allow reverse transcriptases to convert the single-stranded retroviral RNA genome into double-stranded DNA, which is integrated into the host chromosome during infection. This entry represents the RNase H like domain."	"unknown"	"unknown"	"unknown"	"unknown"
"ds2020-267_214"	"276"	"pfam07992"	"gnl|CDD|369639"	"2.55939e-07"	"129"	"254"	"3"	"pfam07992, Pyr_redox_2, Pyridine nucleotide-disulphide oxidoreductase.  This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain."	"Bacteria(0.82);Eukaryota(0.09);Archaea(0.09);"	"(1.00);"	"Pseudomonadaceae(0.12);Nocardiaceae(0.07);Thermococcaceae(0.05);Enterobacteriaceae(0.05);Merismopediaceae(0.05);"	"Pseudomonas(0.12);Rhodococcus(0.07);Synechocystis(0.05);Pyrococcus(0.05);Streptococcus(0.04);"
"ds2020-267_218"	"274"	"pfam01348"	"gnl|CDD|279664"	"1.66328e-05"	"51"	"257"	"3"	"pfam01348, Intron_maturas2, Type II intron maturase.  Group II introns use intron-encoded reverse transcriptase, maturase and DNA endonuclease activities for site-specific insertion into DNA. Although this type of intron is self splicing in vitro they require a maturase protein for splicing in vivo. It has been shown that a specific region of the aI2 intron is needed for the maturase function. This region was found to be conserved in group II introns and called domain X."	"Eukaryota(1.00);"	"(1.00);"	"Saccharomycetaceae(0.40);Brassicaceae(0.20);Schizosaccharomycetaceae(0.20);Poaceae(0.20);"	"Saccharomyces(0.40);Hordeum(0.20);Schizosaccharomyces(0.20);Baimashania(0.20);"
"ds2020-267_227"	"272"	"pfam00873"	"gnl|CDD|334294"	"2.32215e-15"	"36"	"245"	"-1"	"pfam00873, ACR_tran, AcrB/AcrD/AcrF family.  Members of this family are integral membrane proteins. Some are involved in drug resistance. AcrB cooperates with a membrane fusion protein, AcrA, and an outer membrane channel TolC. The structure shows the AcrB forms a homotrimer."	"Bacteria(1.00);"	"(1.00);"	"Enterobacteriaceae(0.40);Merismopediaceae(0.20);Pseudomonadaceae(0.13);Helicobacteraceae(0.13);Burkholderiaceae(0.07);"	"Escherichia(0.40);Synechocystis(0.20);Pseudomonas(0.13);Helicobacter(0.13);Cupriavidus(0.07);"
"ds2020-267_230"	"268"	"pfam00115"	"gnl|CDD|376293"	"1.95663e-15"	"81"	"236"	"-3"	"pfam00115, COX1, Cytochrome C and Quinol oxidase polypeptide I.  "	"Bacteria(0.87);Archaea(0.09);Eukaryota(0.04);"	"(1.00);"	"(0.07);Sulfolobaceae(0.03);Flavobacteriaceae(0.03);Bacillaceae(0.02);Burkholderiaceae(0.02);"	"(0.03);Methylobacterium(0.01);Hyphomicrobium(0.01);Rhodanobacter(0.01);"
"ds2020-267_261"	"260"	"pfam01051"	"gnl|CDD|376444"	"1.77523e-19"	"26"	"217"	"-2"	"pfam01051, Rep_3, Initiator Replication protein.  This protein is an initiator of plasmid replication. RepB possesses nicking-closing (topoisomerase I) like activity. It is also able to perform a strand transfer reaction on ssDNA that contains its target. This family also includes RepA which is an E.coli protein involved in plasmid replication. The RepA protein binds to DNA repeats that flank the repA gene."	"Bacteria(0.97);(0.02);"	"(0.99);"	"Streptococcaceae(0.08);Enterobacteriaceae(0.07);Campylobacteraceae(0.07);Lactobacillaceae(0.07);Pasteurellaceae(0.06);"	"Lactococcus(0.07);Campylobacter(0.07);Lactobacillus(0.06);Acinetobacter(0.04);Escherichia(0.04);"
"ds2020-267_268"	"259"	"pfam02123"	"gnl|CDD|280316"	"3.22949e-21"	"18"	"251"	"3"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_274"	"258"	"pfam03713"	"gnl|CDD|367619"	"2.00811e-09"	"24"	"185"	"-2"	"pfam03713, DUF305, Domain of unknown function (DUF305).  Domain found in small family of bacterial secreted proteins with no known function. Also found in Paramecium bursaria chlorella virus 1. This domain is short and found in one or two copies. The domain has a conserved HH motif that may be functionally important. This domain belongs to the ferritin superfamily. It contains two sequence similar repeats each of which is composed of two alpha helices."	"Bacteria(1.00);"	"(1.00);"	"Nocardiaceae(0.13);Mycobacteriaceae(0.13);Pseudonocardiaceae(0.10);Kineosporiaceae(0.08);Micromonosporaceae(0.08);"	"Nocardia(0.13);Mycolicibacterium(0.13);Saccharopolyspora(0.10);Kineococcus(0.08);Streptomyces(0.08);"
"ds2020-267_278"	"258"	"pfam00012"	"gnl|CDD|365808"	"4.1355e-19"	"50"	"232"	"2"	"pfam00012, HSP70, Hsp70 protein.  Hsp70 chaperones help to fold many proteins. Hsp70 assisted folding involves repeated cycles of substrate binding and release. Hsp70 activity is ATP dependent. Hsp70 proteins are made up of two regions: the amino terminus is the ATPase domain and the carboxyl terminus is the substrate binding region."	"Eukaryota(0.54);Bacteria(0.38);Archaea(0.08);"	"(1.00);"	"Saccharomycetaceae(0.12);Schizosaccharomycetaceae(0.08);Enterobacteriaceae(0.08);Hominidae(0.08);Plasmodiidae(0.04);"	"Homo(0.08);Schizosaccharomyces(0.08);Escherichia(0.08);Saccharomyces(0.08);Synechocystis(0.04);"
"ds2020-267_280"	"257"	"pfam01788"	"gnl|CDD|366811"	"2.47901e-09"	"132"	"224"	"-1"	"pfam01788, PsbJ, PsbJ.  This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. In Synechocystis sp. PCC 6803 PsbJ regulates the number of photosystem II centers in thylakoid membranes, it is a predicted 4kDa protein with one membrane spanning domain."	"Bacteria(0.65);Eukaryota(0.35);"	"(1.00);"	"Microcoleaceae(0.12);Prochloraceae(0.08);Oscillatoriaceae(0.08);Synechococcaceae(0.08);Chroococcaceae(0.08);"	"Oscillatoria(0.08);Prochlorococcus(0.08);Synechococcus(0.08);Ectocarpus(0.04);Arenga(0.04);"
"ds2020-267_283"	"257"	"pfam13041"	"gnl|CDD|372443"	"3.148e-06"	"13"	"114"	"1"	"pfam13041, PPR_2, PPR repeat family.  This repeat has no known function. It is about 35 amino acids long and is found in up to 18 copies in some proteins. The family appears to be greatly expanded in plants and fungi. The repeat has been called PPR."	"Eukaryota(1.00);"	"(1.00);"	"Brassicaceae(0.30);Euphorbiaceae(0.25);Salicaceae(0.16);Funariaceae(0.05);Poaceae(0.04);"	"Arabidopsis(0.30);Ricinus(0.25);Populus(0.16);Physcomitrella(0.05);Naegleria(0.03);"
"ds2020-267_287"	"256"	"pfam00115"	"gnl|CDD|376293"	"2.8946e-26"	"13"	"237"	"1"	"pfam00115, COX1, Cytochrome C and Quinol oxidase polypeptide I.  "	"Bacteria(0.87);Archaea(0.09);Eukaryota(0.04);"	"(1.00);"	"(0.07);Sulfolobaceae(0.03);Flavobacteriaceae(0.03);Bacillaceae(0.02);Burkholderiaceae(0.02);"	"(0.03);Methylobacterium(0.01);Hyphomicrobium(0.01);Rhodanobacter(0.01);"
"ds2020-267_312"	"252"	"pfam00585"	"gnl|CDD|278982"	"1.52007e-05"	"29"	"166"	"2"	"pfam00585, Thr_dehydrat_C, C-terminal regulatory domain of Threonine dehydratase.  Threonine dehydratases pfam00291 all contain a carboxy terminal region. This region may have a regulatory role. Some members contain two copies of this region. This family is homologous to the pfam01842 domain."	"Bacteria(0.69);Eukaryota(0.31);"	"(1.00);"	"Solanaceae(0.23);Enterobacteriaceae(0.15);Pasteurellaceae(0.15);Bacillaceae(0.08);Burkholderiaceae(0.08);"	"Solanum(0.23);Haemophilus(0.15);Salmonella(0.15);Mycobacterium(0.08);Saccharomyces(0.08);"
"ds2020-267_315"	"251"	"pfam00989"	"gnl|CDD|366402"	"1.61999e-05"	"32"	"241"	"2"	"pfam00989, PAS, PAS fold.  The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs. The PAS fold appears in archaea, eubacteria and eukarya."	"Bacteria(0.65);Eukaryota(0.35);"	"(1.00);"	"Bacillaceae(0.14);Enterobacteriaceae(0.14);Bradyrhizobiaceae(0.12);Brassicaceae(0.08);Drosophilidae(0.06);"	"Bacillus(0.14);Bradyrhizobium(0.12);Escherichia(0.10);Arabidopsis(0.08);Drosophila(0.06);"
"ds2020-267_316"	"251"	"pfam02123"	"gnl|CDD|280316"	"3.50628e-08"	"28"	"228"	"-3"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_318"	"251"	"pfam00252"	"gnl|CDD|376306"	"5.63854e-12"	"78"	"206"	"-1"	"pfam00252, Ribosomal_L16, Ribosomal protein L16p/L10e.  "	"Bacteria(0.58);Eukaryota(0.29);Archaea(0.13);"	"(1.00);"	"(0.08);Clostridiaceae(0.03);Mycoplasmataceae(0.03);Spirochaetaceae(0.02);"	"(0.04);Clostridium(0.03);Mycoplasma(0.02);"
"ds2020-267_323"	"250"	"pfam00227"	"gnl|CDD|365960"	"5.8155e-09"	"10"	"150"	"-2"	"pfam00227, Proteasome, Proteasome subunit.  The proteasome is a multisubunit structure that degrades proteins. Protein degradation is an essential component of regulation because proteins can become misfolded, damaged, or unnecessary. Proteasomes and their homologs vary greatly in complexity: from HslV (heat shock locus v), which is encoded by 1 gene in bacteria, to the eukaryotic 20S proteasome, which is encoded by more than 14 genes. Recently evidence of two novel groups of bacterial proteasomes was proposed. The first is Anbu, which is sparsely distributed among cyanobacteria and proteobacteria. The second is call beta-proteobacteria proteasome homolog (BPH)."	"Eukaryota(0.68);Archaea(0.21);Bacteria(0.11);"	"(1.00);"	"Drosophilidae(0.11);Rhabditidae(0.07);Schizosaccharomycetaceae(0.07);Unikaryonidae(0.07);Saccharomycetaceae(0.07);"	"Drosophila(0.11);Schizosaccharomyces(0.07);Caenorhabditis(0.07);Saccharomyces(0.07);Encephalitozoon(0.07);"
"ds2020-267_329"	"249"	"pfam13173"	"gnl|CDD|379049"	"4.80752e-08"	"106"	"249"	"1"	"pfam13173, AAA_14, AAA domain.  This family of domains contain a P-loop motif that is characteristic of the AAA superfamily."	"Bacteria(0.78);Archaea(0.22);"	"(1.00);"	"Bacteroidaceae(0.12);Coxiellaceae(0.09);Mycoplasmataceae(0.09);Sulfolobaceae(0.08);Prevotellaceae(0.05);"	"Bacteroides(0.12);Coxiella(0.09);Mycoplasma(0.09);Prevotella(0.05);Sulfurisphaera(0.04);"
"ds2020-267_33"	"680"	"pfam04157"	"gnl|CDD|367847"	"4.86455e-13"	"342"	"494"	"-1"	"pfam04157, EAP30, EAP30/Vps36 family.  This family includes EAP30 as well as the Vps36 protein. Vps36 is involved in Golgi to endosome trafficking. EAP30 is a subunit of the ELL complex. The ELL is an 80-kDa RNA polymerase II transcription factor. ELL interacts with three other proteins to form the complex known as ELL complex. The ELL complex is capable of increasing that catalytic rate of transcription elongation, but is unable to repress initiation of transcription by RNA polymerase II as is the case of ELL. EAP30 is thought to lead to the derepression of ELL's transcriptional inhibitory activity."	"Eukaryota(1.00);"	"(1.00);"	"Saccharomycetaceae(0.06);Debaryomycetaceae(0.05);(0.03);Schizosaccharomycetaceae(0.02);Mamiellaceae(0.02);"	"Candida(0.02);Schizosaccharomyces(0.02);Micromonas(0.02);Leishmania(0.01);Naumovozyma(0.01);"
"ds2020-267_336"	"248"	"pfam00113"	"gnl|CDD|365883"	"4.23282e-13"	"15"	"116"	"-1"	"pfam00113, Enolase_C, Enolase, C-terminal TIM barrel domain.  "	"Eukaryota(0.67);Bacteria(0.33);"	"(1.00);"	"Pleosporaceae(0.11);Bacillaceae(0.11);Anatidae(0.11);Schizosaccharomycetaceae(0.11);Debaryomycetaceae(0.11);"	"Shigella(0.11);Anas(0.11);Bipolaris(0.11);Zea(0.11);Bacillus(0.11);"
"ds2020-267_352"	"245"	"pfam00946"	"gnl|CDD|366381"	"3.23548e-05"	"1"	"141"	"1"	"pfam00946, Mononeg_RNA_pol, Mononegavirales RNA dependent RNA polymerase.  Members of the Mononegavirales including the Paramyxoviridae, like other non-segmented negative strand RNA viruses, have an RNA-dependent RNA polymerase composed of two subunits, a large protein L and a phosphoprotein P. This is a protein family of the L protein. The L protein confers the RNA polymerase activity on the complex. The P protein acts as a transcription factor."	"Viruses(1.00);"	"Riboviria(1.00);"	"Paramyxoviridae(0.44);Rhabdoviridae(0.31);Pneumoviridae(0.12);Filoviridae(0.12);"	"Lyssavirus(0.12);Aquaparamyxovirus(0.06);Rubulavirus(0.06);Respirovirus(0.06);Avulavirus(0.06);"
"ds2020-267_363"	"243"	"pfam00416"	"gnl|CDD|366086"	"2.02528e-05"	"15"	"134"	"-2"	"pfam00416, Ribosomal_S13, Ribosomal protein S13/S18.  This family includes ribosomal protein S13 from prokaryotes and S18 from eukaryotes."	"Eukaryota(0.55);Bacteria(0.27);Archaea(0.18);"	"(1.00);"	"Brassicaceae(0.18);Muridae(0.09);Poaceae(0.09);Drosophilidae(0.09);Mycobacteriaceae(0.09);"	"Arabidopsis(0.18);Geobacillus(0.09);Rattus(0.09);Haloarcula(0.09);Shigella(0.09);"
"ds2020-267_364"	"243"	"pfam00216"	"gnl|CDD|365952"	"1.5507e-10"	"134"	"241"	"-3"	"pfam00216, Bac_DNA_binding, Bacterial DNA-binding protein.  "	"Bacteria(0.95);Eukaryota(0.02);Viruses(0.01);Archaea(0.01);"	"(0.99);Caudovirales(0.01);"	"Tannerellaceae(0.09);Mycoplasmataceae(0.06);Pseudomonadaceae(0.05);Aquificaceae(0.05);Bacteroidaceae(0.05);"	"Parabacteroides(0.09);Mycoplasma(0.05);Pseudomonas(0.05);Bacteroides(0.05);Desulfovibrio(0.04);"
"ds2020-267_365"	no_hit
"ds2020-267_369"	"243"	"pfam12137"	"gnl|CDD|378818"	"6.51052e-05"	"137"	"217"	"-3"	"pfam12137, RapA_C, RNA polymerase recycling family C-terminal.  This domain is found in bacteria. This domain is about 360 amino acids in length. This domain is found associated with pfam00271, pfam00176. The function of this domain is not known, but structurally it forms an alpha-beta fold in nature with a central beta-sheet flanked by helices and loops, the beta-sheet being mainly antiparallel and flanked by four alpha helices, among which the two longer helices exhibit a coiled-coil arrangement."	"Bacteria(1.00);"	"(1.00);"	"Pasteurellaceae(0.14);Alteromonadaceae(0.07);Morganellaceae(0.05);Methylococcaceae(0.05);Shewanellaceae(0.04);"	"(0.04);Marinobacter(0.04);Shewanella(0.04);Pseudomonas(0.03);Psychromonas(0.03);"
"ds2020-267_370"	"242"	"pfam00146"	"gnl|CDD|376297"	"2.41391e-10"	"22"	"111"	"1"	"pfam00146, NADHdh, NADH dehydrogenase.  "	"Bacteria(0.78);Archaea(0.12);Eukaryota(0.10);"	"(1.00);"	"(0.04);Peptococcaceae(0.02);Flavobacteriaceae(0.02);Desulfurococcaceae(0.02);Prevotellaceae(0.02);"	"(0.02);Prevotella(0.02);Bacteroides(0.01);"
"ds2020-267_374"	"242"	"pfam00124"	"gnl|CDD|365890"	"5.09126e-07"	"21"	"125"	"3"	"pfam00124, Photo_RC, Photosynthetic reaction centre protein.  "	"Bacteria(0.95);Eukaryota(0.05);"	"(1.00);"	"Rhodobacteraceae(0.13);Chromatiaceae(0.13);Ectothiorhodospiraceae(0.08);Synechococcaceae(0.05);(0.05);"	"Rhodobacter(0.08);Sphingomonas(0.05);Halorhodospira(0.05);Roseiflexus(0.05);Thioflavicoccus(0.05);"
"ds2020-267_388"	"241"	"pfam02123"	"gnl|CDD|280316"	"6.16383e-08"	"35"	"214"	"-1"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_4"	"2297"	"pfam02123"	"gnl|CDD|280316"	"1.96254e-52"	"824"	"1858"	"-2"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_402"	no_hit
"ds2020-267_404"	"239"	"pfam00361"	"gnl|CDD|366050"	"3.50341e-05"	"70"	"219"	"1"	"pfam00361, Proton_antipo_M, Proton-conducting membrane transporter.  This is a family of membrane transporters that inlcudes some 7 of potentially 14-16 TM regions. In many instances the family forms part of complex I that catalyzes the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane, and in this context is a combination predominantly of subunits 2, 4, 5, 14, L, M and N. In many bacterial species these proteins are probable stand-alone transporters not coupled with oxidoreduction. The family in total represents homologs across the phyla."	"Eukaryota(0.54);Bacteria(0.46);"	"(1.00);"	"Culicidae(0.23);Rhodobacteraceae(0.23);Poaceae(0.15);Enterobacteriaceae(0.15);Phasianidae(0.08);"	"Paracoccus(0.23);Anopheles(0.23);Escherichia(0.15);Zea(0.15);Aspergillus(0.08);"
"ds2020-267_407"	"239"	"pfam00177"	"gnl|CDD|365924"	"1.14399e-06"	"28"	"126"	"1"	"pfam00177, Ribosomal_S7, Ribosomal protein S7p/S5e.  This family contains ribosomal protein S7 from prokaryotes and S5 from eukaryotes."	"Bacteria(0.79);Archaea(0.11);Eukaryota(0.10);"	"(1.00);"	"(0.09);Clostridiaceae(0.04);Mycoplasmataceae(0.03);Spirochaetaceae(0.02);Ruminococcaceae(0.01);"	"(0.04);Clostridium(0.04);Mycoplasma(0.03);Treponema(0.01);Desulfovibrio(0.01);"
"ds2020-267_42"	"575"	"pfam00284"	"gnl|CDD|366000"	"1.02897e-21"	"187"	"300"	"1"	"pfam00284, Cytochrom_B559a, Lumenal portion of Cytochrome b559, alpha (gene psbE) subunit.  This family is the lumenal portion of cytochrome b559 alpha chain, matches to this family should be accompanied by a match to the pfam00283 family also. The Prosite pattern pattern matches the transmembrane region of the cytochrome b559 alpha and beta subunits."	"Bacteria(0.86);Eukaryota(0.14);"	"(1.00);"	"Synechococcaceae(0.17);Microcoleaceae(0.09);Aphanothecaceae(0.09);Prochloraceae(0.06);Phaeodactylaceae(0.03);"	"Synechococcus(0.17);Prochlorococcus(0.06);Stanieria(0.03);Acaryochloris(0.03);Selaginella(0.03);"
"ds2020-267_42"	"575"	"pfam00283"	"gnl|CDD|365999"	"1.54942e-08"	"79"	"165"	"1"	"pfam00283, Cytochrom_B559, Cytochrome b559, alpha (gene psbE) and beta (gene psbF)subunits.  "	"Bacteria(0.59);Eukaryota(0.41);"	"(1.00);"	"Synechococcaceae(0.29);Prochloraceae(0.12);Pseudanabaenaceae(0.06);Acaryochloridaceae(0.06);Thalassiosiraceae(0.06);"	"Synechococcus(0.29);Prochlorococcus(0.12);Selaginella(0.06);Gloeobacter(0.06);Thalassiosira(0.06);"
"ds2020-267_42"	"575"	"pfam00283"	"gnl|CDD|365999"	"2.95472e-07"	"325"	"411"	"1"	"pfam00283, Cytochrom_B559, Cytochrome b559, alpha (gene psbE) and beta (gene psbF)subunits.  "	"Bacteria(0.59);Eukaryota(0.41);"	"(1.00);"	"Synechococcaceae(0.29);Prochloraceae(0.12);Pseudanabaenaceae(0.06);Acaryochloridaceae(0.06);Thalassiosiraceae(0.06);"	"Synechococcus(0.29);Prochlorococcus(0.12);Selaginella(0.06);Gloeobacter(0.06);Thalassiosira(0.06);"
"ds2020-267_427"	no_hit
"ds2020-267_428"	"235"	"pfam00164"	"gnl|CDD|333891"	"1.04166e-24"	"3"	"182"	"3"	"pfam00164, Ribosom_S12_S23, Ribosomal protein S12/S23.  This protein is known as S12 in bacteria and archaea and S23 in eukaryotes."	"Bacteria(0.45);Eukaryota(0.27);Archaea(0.27);"	"(1.00);"	"Thermococcaceae(0.09);Micrococcaceae(0.09);Enterobacteriaceae(0.09);Thermaceae(0.09);Muridae(0.09);"	"Thermococcus(0.09);Saccharomyces(0.09);Rickettsia(0.09);Synechococcus(0.09);Thermus(0.09);"
"ds2020-267_436"	no_hit
"ds2020-267_438"	no_hit
"ds2020-267_444"	no_hit
"ds2020-267_457"	no_hit
"ds2020-267_466"	"230"	"pfam00072"	"gnl|CDD|333815"	"5.42419e-08"	"50"	"208"	"2"	"pfam00072, Response_reg, Response regulator receiver domain.  This domain receives the signal from the sensor partner in bacterial two-component systems. It is usually found N-terminal to a DNA binding effector domain."	"Bacteria(0.94);Eukaryota(0.06);"	"(1.00);"	"Enterobacteriaceae(0.38);Bacillaceae(0.13);Pseudomonadaceae(0.10);Rhodobacteraceae(0.08);Saccharomycetaceae(0.04);"	"Escherichia(0.21);Bacillus(0.13);Salmonella(0.12);Pseudomonas(0.10);Rhodobacter(0.06);"
"ds2020-267_471"	"230"	"pfam00201"	"gnl|CDD|278624"	"3.12575e-07"	"46"	"210"	"1"	"pfam00201, UDPGT, UDP-glucoronosyl and UDP-glucosyl transferase.  "	"Eukaryota(1.00);"	"(1.00);"	"Hominidae(0.46);Muridae(0.31);Leporidae(0.15);Poaceae(0.08);"	"Homo(0.46);Rattus(0.31);Oryctolagus(0.15);Zea(0.08);"
"ds2020-267_486"	"228"	"pfam17035"	"gnl|CDD|374956"	"5.12677e-09"	"108"	"203"	"3"	"pfam17035, BET, Bromodomain extra-terminal - transcription regulation.  The BET, or bromodomain extra-terminal domain, is found on bromodomain proteins that play key roles in development, cancer progression and virus-host pathogenesis. It interacts with NSD3, JMJD6, CHD4, GLTSCR1, and ATAD5 all of which are shown to impart a pTEFb-independent transcriptional activation function on the bromodomain proteins."	"Eukaryota(1.00);"	"(1.00);"	"(0.06);Poaceae(0.05);Brassicaceae(0.05);Saccharomycetaceae(0.05);Musaceae(0.05);"	"Musa(0.05);Glycine(0.03);Populus(0.03);Ichthyophthirius(0.03);Arabidopsis(0.03);"
"ds2020-267_5"	no_hit
"ds2020-267_504"	no_hit
"ds2020-267_506"	"226"	"pfam05173"	"gnl|CDD|377479"	"2.45773e-17"	"93"	"215"	"-3"	"pfam05173, DapB_C, Dihydrodipicolinate reductase, C-terminus.  Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The C-terminal domain of DapB has been proposed to be the substrate- binding domain."	"Bacteria(0.89);Archaea(0.07);Eukaryota(0.04);"	"(1.00);"	"Clostridiaceae(0.07);(0.06);Flavobacteriaceae(0.04);Lachnospiraceae(0.03);Ruminococcaceae(0.03);"	"Clostridium(0.06);(0.04);Prevotella(0.02);Eubacterium(0.02);Ruminococcus(0.01);"
"ds2020-267_516"	no_hit
"ds2020-267_529"	"224"	"pfam00510"	"gnl|CDD|366140"	"6.14777e-23"	"35"	"211"	"2"	"pfam00510, COX3, Cytochrome c oxidase subunit III.  "	"Eukaryota(1.00);"	"(1.00);"	"Culicidae(0.14);Phasianidae(0.14);Rhabditidae(0.14);Schizosaccharomycetaceae(0.14);Saccharomycetaceae(0.14);"	"Anopheles(0.14);Schizosaccharomyces(0.14);Caenorhabditis(0.14);Gallus(0.14);Neurospora(0.14);"
"ds2020-267_550"	no_hit
"ds2020-267_555"	"222"	"pfam00124"	"gnl|CDD|365890"	"7.71427e-08"	"48"	"203"	"3"	"pfam00124, Photo_RC, Photosynthetic reaction centre protein.  "	"Bacteria(0.95);Eukaryota(0.05);"	"(1.00);"	"Rhodobacteraceae(0.13);Chromatiaceae(0.13);Ectothiorhodospiraceae(0.08);Synechococcaceae(0.05);(0.05);"	"Rhodobacter(0.08);Sphingomonas(0.05);Halorhodospira(0.05);Roseiflexus(0.05);Thioflavicoccus(0.05);"
"ds2020-267_557"	"222"	"pfam00421"	"gnl|CDD|366090"	"3.32623e-20"	"12"	"200"	"-2"	"pfam00421, PSII, Photosystem II protein.  "	"Bacteria(0.79);Eukaryota(0.21);"	"(1.00);"	"Gloeobacteraceae(0.14);Synechococcaceae(0.14);Prochloraceae(0.14);Acaryochloridaceae(0.14);Nostocaceae(0.07);"	"Acaryochloris(0.14);Gloeobacter(0.14);Prochlorococcus(0.14);Synechococcus(0.14);Nostoc(0.07);"
"ds2020-267_558"	"222"	"pfam03737"	"gnl|CDD|377116"	"4.93695e-13"	"57"	"179"	"-2"	"pfam03737, RraA-like, Aldolase/RraA.  Members of this family include regulator of ribonuclease E activity A (RraA) and 4-hydroxy-4-methyl-2-oxoglutarate (HMG)/4-carboxy- 4-hydroxy-2-oxoadipate (CHA) aldolase, also known as RraA-like protein. RraA acts as a trans-acting modulator of RNA turnover, binding essential endonuclease RNase E and inhibiting RNA processing. RraA-like proteins seem to contain aldolase and/or decarboxylase activity either in place of or in addition to the RNase E inhibitor functions."	"Bacteria(0.81);Archaea(0.09);Eukaryota(0.09);"	"(1.00);"	"(0.05);Alcaligenaceae(0.03);Burkholderiaceae(0.03);Saccharomycetaceae(0.03);Streptomycetaceae(0.03);"	"Bacillus(0.03);Corynebacterium(0.03);Streptomyces(0.03);(0.03);Advenella(0.02);"
"ds2020-267_565"	"222"	"pfam03950"	"gnl|CDD|377172"	"9.52435e-10"	"53"	"184"	"-3"	"pfam03950, tRNA-synt_1c_C, tRNA synthetases class I (E and Q), anti-codon binding domain.  Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln)."	"Bacteria(0.37);Archaea(0.36);Eukaryota(0.27);"	"(1.00);"	"(0.07);Thermoproteaceae(0.04);Sulfolobaceae(0.04);Methanobacteriaceae(0.04);Desulfurococcaceae(0.04);"	"(0.07);Geobacter(0.02);Caenorhabditis(0.02);Encephalitozoon(0.02);Dictyostelium(0.02);"
"ds2020-267_571"	"221"	"pfam00501"	"gnl|CDD|366135"	"2.61467e-07"	"34"	"201"	"1"	"pfam00501, AMP-binding, AMP-binding enzyme.  "	"Bacteria(0.56);Eukaryota(0.35);Archaea(0.09);"	"(1.00);"	"Bacillaceae(0.12);Streptomycetaceae(0.10);Mycobacteriaceae(0.09);Rhabditidae(0.09);Brassicaceae(0.07);"	"Bacillus(0.12);Streptomyces(0.10);Caenorhabditis(0.09);Mycobacterium(0.07);Archaeoglobus(0.07);"
"ds2020-267_58"	"476"	"pfam03641"	"gnl|CDD|367590"	"7.19579e-14"	"142"	"315"	"-3"	"pfam03641, Lysine_decarbox, Possible lysine decarboxylase.  The members of this family share a highly conserved motif PGGXGTXXE that is probably functionally important. This family includes proteins annotated as lysine decarboxylases, although the evidence for this is not clear."	"Bacteria(0.79);Eukaryota(0.21);"	"(1.00);"	"Brassicaceae(0.17);Pseudomonadaceae(0.12);Mycobacteriaceae(0.08);Caulobacteraceae(0.08);Streptomycetaceae(0.08);"	"Arabidopsis(0.17);Pseudomonas(0.12);Mesorhizobium(0.08);Caulobacter(0.08);Mycobacterium(0.08);"
"ds2020-267_589"	no_hit
"ds2020-267_595"	"219"	"pfam02123"	"gnl|CDD|280316"	"5.90575e-11"	"13"	"210"	"1"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_597"	"219"	"pfam00421"	"gnl|CDD|366090"	"8.10522e-27"	"22"	"207"	"1"	"pfam00421, PSII, Photosystem II protein.  "	"Bacteria(0.79);Eukaryota(0.21);"	"(1.00);"	"Gloeobacteraceae(0.14);Synechococcaceae(0.14);Prochloraceae(0.14);Acaryochloridaceae(0.14);Nostocaceae(0.07);"	"Acaryochloris(0.14);Gloeobacter(0.14);Prochlorococcus(0.14);Synechococcus(0.14);Nostoc(0.07);"
"ds2020-267_599"	"219"	"pfam05717"	"gnl|CDD|377551"	"3.2952e-06"	"28"	"147"	"1"	"pfam05717, TnpB_IS66, IS66 Orf2 like protein.  This protein is found in insertion sequences related to IS66. The function of these proteins is uncertain, but they are probably essential for transposition."	"Bacteria(0.99);"	"(1.00);"	"(0.07);Burkholderiaceae(0.06);Lachnospiraceae(0.05);Rhizobiaceae(0.05);Bacteroidaceae(0.04);"	"(0.06);Bacteroides(0.04);Burkholderia(0.02);Bradyrhizobium(0.02);Sinorhizobium(0.02);"
"ds2020-267_6"	"1860"	"pfam02123"	"gnl|CDD|280316"	"1.35634e-17"	"1147"	"1764"	"-1"	"pfam02123, RdRP_4, Viral RNA-directed RNA-polymerase.  This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus."	"Viruses(1.00);"	"Riboviria(1.00);"	"Totiviridae(0.44);Solemoviridae(0.19);Luteoviridae(0.15);Reoviridae(0.15);Chrysoviridae(0.07);"	"Sobemovirus(0.19);Rotavirus(0.15);Victorivirus(0.15);Polerovirus(0.11);Totivirus(0.11);"
"ds2020-267_610"	"218"	"pfam17759"	"gnl|CDD|380005"	"1.47034e-13"	"25"	"195"	"1"	"pfam17759, tRNA_synthFbeta, Phenylalanyl tRNA synthetase beta chain CLM domain.  This domain corresponds to the catalytic like domain (CLM) in the beta chain of phe tRNA synthetase."	"unknown"	"unknown"	"unknown"	"unknown"
"ds2020-267_622"	"217"	"pfam16363"	"gnl|CDD|379835"	"2.10592e-11"	"29"	"157"	"2"	"pfam16363, GDP_Man_Dehyd, GDP-mannose 4,6 dehydratase.  "	"Bacteria(0.76);Eukaryota(0.19);Archaea(0.05);"	"(1.00);"	"(0.05);Flavobacteriaceae(0.05);Paenibacillaceae(0.02);Spirochaetaceae(0.02);Nocardioidaceae(0.02);"	"Flavobacterium(0.02);Leptospira(0.01);Paenibacillus(0.01);Plasmodium(0.01);Methylobacterium(0.01);"
"ds2020-267_636"	"217"	"pfam13041"	"gnl|CDD|372443"	"7.94482e-18"	"64"	"195"	"-2"	"pfam13041, PPR_2, PPR repeat family.  This repeat has no known function. It is about 35 amino acids long and is found in up to 18 copies in some proteins. The family appears to be greatly expanded in plants and fungi. The repeat has been called PPR."	"Eukaryota(1.00);"	"(1.00);"	"Brassicaceae(0.30);Euphorbiaceae(0.25);Salicaceae(0.16);Funariaceae(0.05);Poaceae(0.04);"	"Arabidopsis(0.30);Ricinus(0.25);Populus(0.16);Physcomitrella(0.05);Naegleria(0.03);"
"ds2020-267_637"	"217"	"pfam07026"	"gnl|CDD|284449"	"1.36077e-13"	"47"	"172"	"2"	"pfam07026, DUF1317, Protein of unknown function (DUF1317).  This family consists of several hypothetical bacterial and phage proteins of around 60 residues in length. The function of this family is unknown."	"Viruses(0.50);Bacteria(0.50);"	"Caudovirales(0.50);(0.50);"	"Siphoviridae(0.50);Morganellaceae(0.50);"	"Photorhabdus(0.50);Lambdavirus(0.50);"
"ds2020-267_639"	"216"	"pfam13631"	"gnl|CDD|379304"	"4.98182e-07"	"2"	"214"	"2"	"pfam13631, Cytochrom_B_N_2, Cytochrome b(N-terminal)/b6/petB.  "	"Bacteria(0.65);Archaea(0.25);Eukaryota(0.10);"	"(1.00);"	"Mycobacteriaceae(0.15);Thermomonosporaceae(0.10);Sulfolobaceae(0.10);Thermoplasmataceae(0.10);Drosophilidae(0.05);"	"Thermomonospora(0.10);Mycobacterium(0.10);Saccharolobus(0.10);Thermoplasma(0.10);Streptomyces(0.05);"
"ds2020-267_641"	"216"	"pfam00804"	"gnl|CDD|366315"	"9.72957e-23"	"23"	"187"	"-3"	"pfam00804, Syntaxin, Syntaxin.  Syntaxins are the prototype family of SNARE proteins. They usually consist of three main regions - a C-terminal transmembrane region, a central SNARE domain which is characteristic of and conserved in all syntaxins (pfam05739), and an N-terminal domain that is featured in this entry. This domain varies between syntaxin isoforms; in syntaxin 1A it is found as three alpha-helices with a left-handed twist. It may fold back on the SNARE domain to allow the molecule to adopt a 'closed' configuration that prevents formation of the core fusion complex - it thus has an auto-inhibitory role. The function of syntaxins is determined by their localization. They are involved in neuronal exocytosis, ER-Golgi transport and Golgi-endosome transport, for example. They also interact with other proteins as well as those involved in SNARE complexes. These include vesicle coat proteins, Rab GTPases, and tethering factors."	"Eukaryota(1.00);"	"(1.00);"	"Adrianichthyidae(0.08);Tetraodontidae(0.08);Brassicaceae(0.08);Poaceae(0.06);Funariaceae(0.06);"	"Oryzias(0.08);Physcomitrella(0.06);Oreochromis(0.06);Selaginella(0.06);Arabidopsis(0.05);"
"ds2020-267_642"	no_hit
"ds2020-267_643"	no_hit
"ds2020-267_65"	"462"	"pfam13406"	"gnl|CDD|372592"	"4.82338e-27"	"145"	"360"	"-1"	"pfam13406, SLT_2, Transglycosylase SLT domain.  This family is related to the SLT domain pfam01464."	"Bacteria(1.00);"	"(1.00);"	"Rhizobiaceae(0.23);Phyllobacteriaceae(0.08);Chromobacteriaceae(0.08);Methylococcaceae(0.08);Legionellaceae(0.08);"	"Sinorhizobium(0.23);Piscirickettsia(0.08);Acinetobacter(0.08);Desulfotalea(0.08);Ruegeria(0.08);"
"ds2020-267_669"	"215"	"pfam17862"	"gnl|CDD|380037"	"4.46873e-06"	"33"	"122"	"-1"	"pfam17862, AAA_lid_3, AAA+ lid domain.  This entry represents the alpha helical AAA+ lid domain that is found to the C-terminus of AAA domains."	"unknown"	"unknown"	"unknown"	"unknown"
"ds2020-267_685"	"214"	"pfam09334"	"gnl|CDD|370442"	"1.80219e-14"	"16"	"117"	"-2"	"pfam09334, tRNA-synt_1g, tRNA synthetases class I (M).  This family includes methionyl tRNA synthetases."	"Bacteria(0.57);Archaea(0.26);Eukaryota(0.17);"	"(1.00);"	"Erwiniaceae(0.09);Methanocaldococcaceae(0.04);Thermoproteaceae(0.04);Streptomycetaceae(0.04);Methanosarcinaceae(0.04);"	"Saccharolobus(0.04);Leptospira(0.04);Wigglesworthia(0.04);Borreliella(0.04);Saccharomyces(0.04);"
"ds2020-267_692"	"214"	"pfam00680"	"gnl|CDD|366242"	"4.79875e-05"	"70"	"180"	"1"	"pfam00680, RdRP_1, RNA dependent RNA polymerase.  "	"Viruses(1.00);"	"Riboviria(1.00);"	"Caliciviridae(0.30);Picornaviridae(0.30);Secoviridae(0.20);Potyviridae(0.20);"	"Vesivirus(0.20);Aphthovirus(0.10);Sequivirus(0.10);Bymovirus(0.10);Potyvirus(0.10);"
"ds2020-267_697"	"214"	"pfam00378"	"gnl|CDD|334046"	"5.08319e-08"	"45"	"170"	"-3"	"pfam00378, ECH_1, Enoyl-CoA hydratase/isomerase.  This family contains a diverse set of enzymes including: enoyl-CoA hydratase, napthoate synthase, carnitate racemase, 3-hydroxybutyryl-CoA dehydratase and dodecanoyl-CoA delta-isomerase."	"Bacteria(0.56);Eukaryota(0.44);"	"(1.00);"	"Bacillaceae(0.33);Hominidae(0.22);Enterobacteriaceae(0.11);Rhabditidae(0.11);Rhodobacteraceae(0.11);"	"Bacillus(0.33);Homo(0.22);Rhodobacter(0.11);Escherichia(0.11);Mus(0.11);"
"ds2020-267_710"	no_hit
"ds2020-267_719"	"213"	"pfam00072"	"gnl|CDD|333815"	"9.1657e-13"	"45"	"185"	"-2"	"pfam00072, Response_reg, Response regulator receiver domain.  This domain receives the signal from the sensor partner in bacterial two-component systems. It is usually found N-terminal to a DNA binding effector domain."	"Bacteria(0.94);Eukaryota(0.06);"	"(1.00);"	"Enterobacteriaceae(0.38);Bacillaceae(0.13);Pseudomonadaceae(0.10);Rhodobacteraceae(0.08);Saccharomycetaceae(0.04);"	"Escherichia(0.21);Bacillus(0.13);Salmonella(0.12);Pseudomonas(0.10);Rhodobacter(0.06);"
"ds2020-267_729"	"212"	"pfam01405"	"gnl|CDD|279713"	"5.43744e-05"	"19"	"99"	"-3"	"pfam01405, PsbT, Photosystem II reaction centre T protein.  The exact function of this protein is unknown. It probably consists of a single transmembrane spanning helix. The Chlamydomonas reinhardtii psbT protein appears to be (i) a novel photosystem II subunit and (ii) required for maintaining optimal photosystem II activity under adverse growth conditions."	"Bacteria(0.73);Eukaryota(0.27);"	"(1.00);"	"Synechococcaceae(0.33);Prochloraceae(0.13);Aphanothecaceae(0.07);Chlamydomonadaceae(0.07);Chamaesiphonaceae(0.07);"	"Synechococcus(0.33);Prochlorococcus(0.13);Aureococcus(0.07);Geitlerinema(0.07);Rippkaea(0.07);"
"ds2020-267_746"	no_hit
"ds2020-267_75"	"425"	"pfam00005"	"gnl|CDD|365804"	"4.26261e-07"	"129"	"275"	"-1"	"pfam00005, ABC_tran, ABC transporter.  ABC transporters for a large family of proteins responsible for translocation of a variety of compounds across biological membranes. ABC transporters are the largest family of proteins in many completely sequenced bacteria. ABC transporters are composed of two copies of this domain and two copies of a transmembrane domain pfam00664. These four domains may belong to a single polypeptide as in CFTR, or belong in different polypeptide chains."	"Bacteria(0.74);Eukaryota(0.26);"	"(1.00);"	"Enterobacteriaceae(0.41);Saccharomycetaceae(0.13);Pseudomonadaceae(0.07);Streptococcaceae(0.06);Bacillaceae(0.04);"	"Escherichia(0.37);Saccharomyces(0.13);Pseudomonas(0.07);Bacillus(0.04);Lactococcus(0.04);"
"ds2020-267_750"	"211"	"pfam02391"	"gnl|CDD|376774"	"1.24642e-05"	"28"	"114"	"-2"	"pfam02391, MoaE, MoaE protein.  This family contains the MoaE protein that is involved in biosynthesis of molybdopterin. Molybdopterin, the universal component of the pterin molybdenum cofactors, contains a dithiolene group serving to bind Mo. Addition of the dithiolene sulfurs to a molybdopterin precursor requires the activity of the converting factor. Converting factor contains the MoaE and MoaD proteins."	"Bacteria(0.72);Eukaryota(0.21);Archaea(0.07);"	"(1.00);"	"(0.04);Bacillaceae(0.03);Mycobacteriaceae(0.02);Burkholderiaceae(0.02);Paenibacillaceae(0.02);"	"Bacillus(0.02);(0.01);Drosophila(0.01);Paenibacillus(0.01);Mycobacterium(0.01);"
"ds2020-267_763"	no_hit
"ds2020-267_773"	"210"	"pfam01641"	"gnl|CDD|376583"	"5.23903e-34"	"16"	"174"	"1"	"pfam01641, SelR, SelR domain.  Methionine sulfoxide reduction is an important process, by which cells regulate biological processes and cope with oxidative stress. MsrA, a protein involved in the reduction of methionine sulfoxides in proteins, has been known for four decades and has been extensively characterized with respect to structure and function. However, recent studies revealed that MsrA is only specific for methionine-S-sulfoxides. Because oxidized methionines occur in a mixture of R and S isomers in vivo, it was unclear how stereo-specific MsrA could be responsible for the reduction of all protein methionine sulfoxides. It appears that a second methionine sulfoxide reductase, SelR, evolved that is specific for methionine-R-sulfoxides, the activity that is different but complementary to that of MsrA. Thus, these proteins, working together, could reduce both stereoisomers of methionine sulfoxide. This domain is found both in SelR proteins and fused with the peptide methionine sulfoxide reductase enzymatic domain pfam01625. The domain has two conserved cysteine and histidines. The domain binds both selenium and zinc. The final cysteine is found to be replaced by the rare amino acid selenocysteine in some members of the family. This family has methionine-R-sulfoxide reductase activity."	"Bacteria(0.79);Eukaryota(0.16);Archaea(0.05);"	"(1.00);"	"Flavobacteriaceae(0.05);(0.04);Saccharomycetaceae(0.02);Vibrionaceae(0.01);Spirochaetaceae(0.01);"	"(0.02);Mycoplasma(0.01);Vibrio(0.01);Corynebacterium(0.01);"
"ds2020-267_8"	"1703"	"pfam00680"	"gnl|CDD|366242"	"2.85682e-13"	"685"	"1458"	"-3"	"pfam00680, RdRP_1, RNA dependent RNA polymerase.  "	"Viruses(1.00);"	"Riboviria(1.00);"	"Caliciviridae(0.30);Picornaviridae(0.30);Secoviridae(0.20);Potyviridae(0.20);"	"Vesivirus(0.20);Aphthovirus(0.10);Sequivirus(0.10);Bymovirus(0.10);Potyvirus(0.10);"
"ds2020-267_811"	"208"	"pfam07991"	"gnl|CDD|285265"	"1.80927e-08"	"20"	"190"	"-1"	"pfam07991, IlvN, Acetohydroxy acid isomeroreductase, NADPH-binding domain.  Acetohydroxy acid isomeroreductase catalyzes the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine. This N-terminal region of the enzyme carries the binding-site for NADPH. The active-site for enzymatic activity lies in the C-terminal part, IlvC, pfam01450."	"Bacteria(0.76);Archaea(0.24);"	"(1.00);"	"Bacillaceae(0.07);Helicobacteraceae(0.05);Sulfolobaceae(0.05);Bartonellaceae(0.02);Leptospiraceae(0.02);"	"Bacillus(0.07);Thermus(0.02);Tropheryma(0.02);Corynebacterium(0.02);Pyrococcus(0.02);"
"ds2020-267_817"	"208"	"pfam05656"	"gnl|CDD|377540"	"3.45664e-06"	"86"	"190"	"-1"	"pfam05656, DUF805, Protein of unknown function (DUF805).  This family consists of several bacterial proteins of unknown function."	"Bacteria(1.00);"	"(1.00);"	"Veillonellaceae(0.07);Sutterellaceae(0.06);Sphingomonadaceae(0.05);Rhodobacteraceae(0.04);Caulobacteraceae(0.04);"	"Veillonella(0.04);Sphingomonas(0.04);Asticcacaulis(0.03);Dakarella(0.03);Prevotella(0.03);"
"ds2020-267_837"	"207"	"pfam04061"	"gnl|CDD|367791"	"2.43363e-18"	"1"	"159"	"1"	"pfam04061, ORMDL, ORMDL family.  Evidence form suggests that ORMDLs are involved in protein folding in the ER. Orm proteins have been identified as negative regulators of sphingolipid synthesis that form a conserved complex with serine palmitoyltransferase, the first and rate-limiting enzyme in sphingolipid production. This novel and conserved protein complex, has been termed the SPOTS complex (serine palmitoyltransferase, Orm1/2, Tsc3, and Sac1)."	"Eukaryota(1.00);"	"(1.00);"	"Saccharomycetaceae(0.15);Nosematidae(0.04);(0.04);Phaffomycetaceae(0.03);Salpingoecidae(0.03);"	"Kazachstania(0.04);Thalassiosira(0.03);Trichomonas(0.03);Nosema(0.03);Nakaseomyces(0.03);"
"ds2020-267_94"	no_hit
"ds2020-267_97"	"380"	"pfam04879"	"gnl|CDD|368171"	"1.9903e-08"	"125"	"274"	"-2"	"pfam04879, Molybdop_Fe4S4, Molybdopterin oxidoreductase Fe4S4 domain.  This domain is found in formate dehydrogenase H for which the structure is known. This first domain (residues 1 to 60) of Structure 1aa6 is an Fe4S4 cluster just below the protein surface."	"Bacteria(0.75);Archaea(0.25);"	"(1.00);"	"Enterobacteriaceae(0.11);Bacillaceae(0.09);Pseudomonadaceae(0.08);Methanobacteriaceae(0.06);Phyllobacteriaceae(0.06);"	"Bacillus(0.09);Escherichia(0.09);Pseudomonas(0.08);Mesorhizobium(0.06);Synechococcus(0.06);"
"ds2020-267_98"	"379"	"pfam16203"	"gnl|CDD|374428"	"1.33948e-30"	"131"	"280"	"-1"	"pfam16203, ERCC3_RAD25_C, ERCC3/RAD25/XPB C-terminal helicase.  This is the C-terminal helicase domain of ERCC3, RAD25 and XPB helicases."	"Eukaryota(1.00);"	"(1.00);"	"Cryptosporidiidae(0.06);Vahlkampfiidae(0.06);(0.03);Opisthorchiidae(0.03);Chaetomiaceae(0.03);"	"Naegleria(0.06);Cryptosporidium(0.06);Micromonas(0.03);Batrachochytrium(0.03);Caenorhabditis(0.03);"